From 5e608d43cc5e20d548042ea81001af117b26d10f Mon Sep 17 00:00:00 2001 From: Maximilian Attems Date: Sun, 4 May 2008 13:28:19 +0000 Subject: [PATCH] update to 2.6.26-rc1 * refresh dfsg orig patches: - usb serial - usb misc * rm dabusb firmware patch as merged * disable bnx2 request firmware patch, fails to apply * nuke old m68k 2.6.24 patches, just in the way svn path=/dists/trunk/linux-2.6/; revision=11252 --- debian/changelog | 2 +- debian/patches/bugfix/all/patch-2.6.25-git18 | 1265601 -------------- .../bugfix/m68k/2.6.24/130-adbraw.diff | 42 - .../bugfix/m68k/2.6.24/134-atari-fat.diff | 101 - .../patches/bugfix/m68k/2.6.24/141-ide.diff | 37 - .../patches/bugfix/m68k/2.6.24/143-ioext.diff | 1351 - .../bugfix/m68k/2.6.24/149-mc68681.diff | 145 - .../patches/bugfix/m68k/2.6.24/152-pci.diff | 20 - .../patches/bugfix/m68k/2.6.24/448-ide.diff | 22 - .../bugfix/m68k/2.6.24/478-serial.diff | 29 - .../bugfix/m68k/2.6.24/633-atari_scc.diff | 1739 - .../bugfix/m68k/2.6.24/amiga-debug=mem.diff | 74 - .../m68k/2.6.24/amiga-platform-device.diff | 191 - .../m68k/2.6.24/amiga-platform-device2.diff | 32 - .../bugfix/m68k/2.6.24/atari-aranym.diff | 534 - .../bugfix/m68k/2.6.24/atari-ethernat.diff | 2581 - .../bugfix/m68k/2.6.24/atari-ethernec.diff | 1063 - .../m68k/2.6.24/atari-platform-device.diff | 73 - .../bugfix/m68k/2.6.24/atari-rom-isa.diff | 401 - .../m68k/2.6.24/b43-depends-on-HAS_DMA.diff | 32 - .../blinux-list-is-subscribers-only.diff | 15 - .../2.6.24/checkpatch-print-filenames.diff | 28 - .../2.6.24/falconide_intr_lock-ratelimit.diff | 22 - .../bugfix/m68k/2.6.24/m68k-initrd-fix.diff | 52 - .../m68k-q40ints.c-needs-asm-floppy.h.diff | 22 - .../m68k-replace-linux-68k-by-linux-m68k.diff | 244 - .../m68k/2.6.24/m68k-scsi-HOST_C-cleanup.diff | 22 - .../m68k/2.6.24/mac-platform-device.diff | 20 - .../patches/bugfix/m68k/2.6.24/series-extra | 50 - .../2.6.24/zorro-module-device-table.diff | 238 - .../drivers-media-video-dabus-license.patch | 34 - .../dfsg/drivers-usb-misc-emi62-emi26.patch | 10 +- .../drivers-usb-serial-keyspan-remove.patch | 10 +- debian/patches/series/1~experimental.1 | 3 +- debian/patches/series/1~experimental.1-extra | 33 - debian/patches/series/orig-0 | 1 - 36 files changed, 12 insertions(+), 1274862 deletions(-) delete mode 100644 debian/patches/bugfix/all/patch-2.6.25-git18 delete mode 100644 debian/patches/bugfix/m68k/2.6.24/130-adbraw.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/134-atari-fat.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/141-ide.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/143-ioext.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/149-mc68681.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/152-pci.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/448-ide.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/478-serial.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/633-atari_scc.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/amiga-debug=mem.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/amiga-platform-device.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/amiga-platform-device2.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/atari-aranym.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/atari-ethernat.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/atari-ethernec.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/atari-platform-device.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/atari-rom-isa.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/b43-depends-on-HAS_DMA.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/blinux-list-is-subscribers-only.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/checkpatch-print-filenames.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/falconide_intr_lock-ratelimit.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/m68k-initrd-fix.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/m68k-q40ints.c-needs-asm-floppy.h.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/m68k-replace-linux-68k-by-linux-m68k.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/m68k-scsi-HOST_C-cleanup.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/mac-platform-device.diff delete mode 100644 debian/patches/bugfix/m68k/2.6.24/series-extra delete mode 100644 debian/patches/bugfix/m68k/2.6.24/zorro-module-device-table.diff delete mode 100644 debian/patches/debian/dfsg/drivers-media-video-dabus-license.patch diff --git a/debian/changelog b/debian/changelog index e1eb32a77..121d895c6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -linux-2.6 (2.6.25-1~git.2) UNRELEASED; urgency=low +linux-2.6 (2.6.26~rc1-1~experimental.1) UNRELEASED; urgency=low [ maximilian attems ] * topconfig set CRYPTO_CTS, SND_PCSP, SND_AW2, IWL4965_LEDS, IWL3945_LEDS, diff --git a/debian/patches/bugfix/all/patch-2.6.25-git18 b/debian/patches/bugfix/all/patch-2.6.25-git18 deleted file mode 100644 index 1b41aa9c5..000000000 --- a/debian/patches/bugfix/all/patch-2.6.25-git18 +++ /dev/null @@ -1,1265601 +0,0 @@ -diff --git a/.gitignore b/.gitignore -index fdcce40..090b293 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -27,6 +27,7 @@ TAGS - vmlinux* - !vmlinux.lds.S - System.map -+Module.markers - Module.symvers - !.gitignore - -@@ -40,6 +41,7 @@ include/linux/autoconf.h - include/linux/compile.h - include/linux/version.h - include/linux/utsrelease.h -+include/linux/bounds.h - - # stgit generated dirs - patches-* -diff --git a/.mailmap b/.mailmap -index ebf9bf8..7260842 100644 ---- a/.mailmap -+++ b/.mailmap -@@ -88,6 +88,7 @@ Rudolf Marek - Rui Saraiva - Sachin P Sant - Sam Ravnborg -+S.Çağlar Onur - Simon Kelley - Stéphane Witzmann - Stephen Hemminger -diff --git a/CREDITS b/CREDITS -index da0a56e..8fec7b3 100644 ---- a/CREDITS -+++ b/CREDITS -@@ -403,6 +403,8 @@ D: Linux CD and Support Giveaway List - N: Erik Inge Bolsø - E: knan@mo.himolde.no - D: Misc kernel hacks -+D: Updated PC speaker driver for 2.3 -+S: Norway - - N: Andreas E. Bombe - E: andreas.bombe@munich.netsurf.de -@@ -3116,6 +3118,12 @@ S: Post Office Box 64132 - S: Sunnyvale, California 94088-4132 - S: USA - -+N: Stas Sergeev -+E: stsp@users.sourceforge.net -+D: PCM PC-Speaker driver -+D: misc fixes -+S: Russia -+ - N: Simon Shapiro - E: shimon@i-Connect.Net - W: http://www.-i-Connect.Net/~shimon -diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX -index e8fb246..1977fab 100644 ---- a/Documentation/00-INDEX -+++ b/Documentation/00-INDEX -@@ -25,8 +25,6 @@ DMA-API.txt - - DMA API, pci_ API & extensions for non-consistent memory machines. - DMA-ISA-LPC.txt - - How to do DMA with ISA (and LPC) devices. --DMA-mapping.txt -- - info for PCI drivers using DMA portably across all platforms. - DocBook/ - - directory with DocBook templates etc. for kernel documentation. - HOWTO -@@ -43,8 +41,6 @@ ManagementStyle - - how to (attempt to) manage kernel hackers. - MSI-HOWTO.txt - - the Message Signaled Interrupts (MSI) Driver Guide HOWTO and FAQ. --PCIEBUS-HOWTO.txt -- - a guide describing the PCI Express Port Bus driver. - RCU/ - - directory with info on RCU (read-copy update). - README.DAC960 -@@ -167,10 +163,8 @@ highuid.txt - - notes on the change from 16 bit to 32 bit user/group IDs. - hpet.txt - - High Precision Event Timer Driver for Linux. --hrtimer/ -- - info on the timer_stats debugging facility for timer (ab)use. --hrtimers/ -- - info on the hrtimers subsystem for high-resolution kernel timers. -+timers/ -+ - info on the timer related topics - hw_random.txt - - info on Linux support for random number generator in i8xx chipsets. - hwmon/ -@@ -287,12 +281,6 @@ parport.txt - - how to use the parallel-port driver. - parport-lowlevel.txt - - description and usage of the low level parallel port functions. --pci-error-recovery.txt -- - info on PCI error recovery. --pci.txt -- - info on the PCI subsystem for device driver authors. --pcieaer-howto.txt -- - the PCI Express Advanced Error Reporting Driver Guide HOWTO. - pcmcia/ - - info on the Linux PCMCIA driver. - pi-futex.txt -@@ -341,8 +329,6 @@ sgi-visws.txt - - short blurb on the SGI Visual Workstations. - sh/ - - directory with info on porting Linux to a new architecture. --smart-config.txt -- - description of the Smart Config makefile feature. - sound/ - - directory with info on sound card support. - sparc/ -diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb -new file mode 100644 -index 0000000..9c49d8e ---- /dev/null -+++ b/Documentation/ABI/obsolete/o2cb -@@ -0,0 +1,11 @@ -+What: /sys/o2cb symlink -+Date: Dec 2005 -+KernelVersion: 2.6.16 -+Contact: ocfs2-devel@oss.oracle.com -+Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will -+ be removed when new versions of ocfs2-tools which know to look -+ in /sys/fs/o2cb are sufficiently prevalent. Don't code new -+ software to look here, it should try /sys/fs/o2cb instead. -+ See Documentation/ABI/stable/o2cb for more information on usage. -+Users: ocfs2-tools. It's sufficient to mail proposed changes to -+ ocfs2-devel@oss.oracle.com. -diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb -new file mode 100644 -index 0000000..5eb1545 ---- /dev/null -+++ b/Documentation/ABI/stable/o2cb -@@ -0,0 +1,10 @@ -+What: /sys/fs/o2cb/ (was /sys/o2cb) -+Date: Dec 2005 -+KernelVersion: 2.6.16 -+Contact: ocfs2-devel@oss.oracle.com -+Description: Ocfs2-tools looks at 'interface-revision' for versioning -+ information. Each logmask/ file controls a set of debug prints -+ and can be written into with the strings "allow", "deny", or -+ "off". Reading the file returns the current state. -+Users: ocfs2-tools. It's sufficient to mail proposed changes to -+ ocfs2-devel@oss.oracle.com. -diff --git a/Documentation/ABI/stable/sysfs-class-ubi b/Documentation/ABI/stable/sysfs-class-ubi -new file mode 100644 -index 0000000..18d471d ---- /dev/null -+++ b/Documentation/ABI/stable/sysfs-class-ubi -@@ -0,0 +1,212 @@ -+What: /sys/class/ubi/ -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ The ubi/ class sub-directory belongs to the UBI subsystem and -+ provides general UBI information, per-UBI device information -+ and per-UBI volume information. -+ -+What: /sys/class/ubi/version -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ This file contains version of the latest supported UBI on-media -+ format. Currently it is 1, and there is no plan to change this. -+ However, if in the future UBI needs on-flash format changes -+ which cannot be done in a compatible manner, a new format -+ version will be added. So this is a mechanism for possible -+ future backward-compatible (but forward-incompatible) -+ improvements. -+ -+What: /sys/class/ubiX/ -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ The /sys/class/ubi0, /sys/class/ubi1, etc directories describe -+ UBI devices (UBI device 0, 1, etc). They contain general UBI -+ device information and per UBI volume information (each UBI -+ device may have many UBI volumes) -+ -+What: /sys/class/ubi/ubiX/avail_eraseblocks -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Amount of available logical eraseblock. For example, one may -+ create a new UBI volume which has this amount of logical -+ eraseblocks. -+ -+What: /sys/class/ubi/ubiX/bad_peb_count -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Count of bad physical eraseblocks on the underlying MTD device. -+ -+What: /sys/class/ubi/ubiX/bgt_enabled -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Contains ASCII "0\n" if the UBI background thread is disabled, -+ and ASCII "1\n" if it is enabled. -+ -+What: /sys/class/ubi/ubiX/dev -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Major and minor numbers of the character device corresponding -+ to this UBI device (in : format). -+ -+What: /sys/class/ubi/ubiX/eraseblock_size -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Maximum logical eraseblock size this UBI device may provide. UBI -+ volumes may have smaller logical eraseblock size because of their -+ alignment. -+ -+What: /sys/class/ubi/ubiX/max_ec -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Maximum physical eraseblock erase counter value. -+ -+What: /sys/class/ubi/ubiX/max_vol_count -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Maximum number of volumes which this UBI device may have. -+ -+What: /sys/class/ubi/ubiX/min_io_size -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Minimum input/output unit size. All the I/O may only be done -+ in fractions of the contained number. -+ -+What: /sys/class/ubi/ubiX/mtd_num -+Date: January 2008 -+KernelVersion: 2.6.25 -+Contact: Artem Bityutskiy -+Description: -+ Number of the underlying MTD device. -+ -+What: /sys/class/ubi/ubiX/reserved_for_bad -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Number of physical eraseblocks reserved for bad block handling. -+ -+What: /sys/class/ubi/ubiX/total_eraseblocks -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Total number of good (not marked as bad) physical eraseblocks on -+ the underlying MTD device. -+ -+What: /sys/class/ubi/ubiX/volumes_count -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Count of volumes on this UBI device. -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/ -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ The /sys/class/ubi/ubiX/ubiX_0/, /sys/class/ubi/ubiX/ubiX_1/, -+ etc directories describe UBI volumes on UBI device X (volumes -+ 0, 1, etc). -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/alignment -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Volume alignment - the value the logical eraseblock size of -+ this volume has to be aligned on. For example, 2048 means that -+ logical eraseblock size is multiple of 2048. In other words, -+ volume logical eraseblock size is UBI device logical eraseblock -+ size aligned to the alignment value. -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/corrupted -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Contains ASCII "0\n" if the UBI volume is OK, and ASCII "1\n" -+ if it is corrupted (e.g., due to an interrupted volume update). -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/data_bytes -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ The amount of data this volume contains. This value makes sense -+ only for static volumes, and for dynamic volume it equivalent -+ to the total volume size in bytes. -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/dev -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Major and minor numbers of the character device corresponding -+ to this UBI volume (in : format). -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/name -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Volume name. -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/reserved_ebs -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Count of physical eraseblock reserved for this volume. -+ Equivalent to the volume size in logical eraseblocks. -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/type -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Volume type. Contains ASCII "dynamic\n" for dynamic volumes and -+ "static\n" for static volumes. -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/upd_marker -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Contains ASCII "0\n" if the update marker is not set for this -+ volume, and "1\n" if it is set. The update marker is set when -+ volume update starts, and cleaned when it ends. So the presence -+ of the update marker indicates that the volume is being updated -+ at the moment of the update was interrupted. The later may be -+ checked using the "corrupted" sysfs file. -+ -+What: /sys/class/ubi/ubiX/ubiX_Y/usable_eb_size -+Date: July 2006 -+KernelVersion: 2.6.22 -+Contact: Artem Bityutskiy -+Description: -+ Logical eraseblock size of this volume. Equivalent to logical -+ eraseblock size of the device aligned on the volume alignment -+ value. -diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci -new file mode 100644 -index 0000000..ceddcff ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-bus-pci -@@ -0,0 +1,11 @@ -+What: /sys/bus/pci/devices/.../vpd -+Date: February 2008 -+Contact: Ben Hutchings -+Description: -+ A file named vpd in a device directory will be a -+ binary file containing the Vital Product Data for the -+ device. It should follow the VPD format defined in -+ PCI Specification 2.1 or 2.2, but users should consider -+ that some devices may have malformatted data. If the -+ underlying VPD has a writable section then the -+ corresponding section of this file will be writable. -diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi -new file mode 100644 -index 0000000..5ac1e01 ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-class-bdi -@@ -0,0 +1,46 @@ -+What: /sys/class/bdi// -+Date: January 2008 -+Contact: Peter Zijlstra -+Description: -+ -+Provide a place in sysfs for the backing_dev_info object. This allows -+setting and retrieving various BDI specific variables. -+ -+The identifier can be either of the following: -+ -+MAJOR:MINOR -+ -+ Device number for block devices, or value of st_dev on -+ non-block filesystems which provide their own BDI, such as NFS -+ and FUSE. -+ -+default -+ -+ The default backing dev, used for non-block device backed -+ filesystems which do not provide their own BDI. -+ -+Files under /sys/class/bdi// -+--------------------------------- -+ -+read_ahead_kb (read-write) -+ -+ Size of the read-ahead window in kilobytes -+ -+min_ratio (read-write) -+ -+ Under normal circumstances each device is given a part of the -+ total write-back cache that relates to its current average -+ writeout speed in relation to the other devices. -+ -+ The 'min_ratio' parameter allows assigning a minimum -+ percentage of the write-back cache to a particular device. -+ For example, this is useful for providing a minimum QoS. -+ -+max_ratio (read-write) -+ -+ Allows limiting a particular device to use not more than the -+ given percentage of the write-back cache. This is useful in -+ situations where we want to avoid one device taking all or -+ most of the write-back cache. For example in case of an NFS -+ mount that is prone to get stuck, or a FUSE mount which cannot -+ be trusted to play fair. -diff --git a/Documentation/ABI/testing/sysfs-ibft b/Documentation/ABI/testing/sysfs-ibft -new file mode 100644 -index 0000000..c2b7d11 ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-ibft -@@ -0,0 +1,23 @@ -+What: /sys/firmware/ibft/initiator -+Date: November 2007 -+Contact: Konrad Rzeszutek -+Description: The /sys/firmware/ibft/initiator directory will contain -+ files that expose the iSCSI Boot Firmware Table initiator data. -+ Usually this contains the Initiator name. -+ -+What: /sys/firmware/ibft/targetX -+Date: November 2007 -+Contact: Konrad Rzeszutek -+Description: The /sys/firmware/ibft/targetX directory will contain -+ files that expose the iSCSI Boot Firmware Table target data. -+ Usually this contains the target's IP address, boot LUN, -+ target name, and what NIC it is associated with. It can also -+ contain the CHAP name (and password), the reverse CHAP -+ name (and password) -+ -+What: /sys/firmware/ibft/ethernetX -+Date: November 2007 -+Contact: Konrad Rzeszutek -+Description: The /sys/firmware/ibft/ethernetX directory will contain -+ files that expose the iSCSI Boot Firmware Table NIC data. -+ This can this can the IP address, MAC, and gateway of the NIC. -diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2 -new file mode 100644 -index 0000000..b7cc516 ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-ocfs2 -@@ -0,0 +1,89 @@ -+What: /sys/fs/ocfs2/ -+Date: April 2008 -+Contact: ocfs2-devel@oss.oracle.com -+Description: -+ The /sys/fs/ocfs2 directory contains knobs used by the -+ ocfs2-tools to interact with the filesystem. -+ -+What: /sys/fs/ocfs2/max_locking_protocol -+Date: April 2008 -+Contact: ocfs2-devel@oss.oracle.com -+Description: -+ The /sys/fs/ocfs2/max_locking_protocol file displays version -+ of ocfs2 locking supported by the filesystem. This version -+ covers how ocfs2 uses distributed locking between cluster -+ nodes. -+ -+ The protocol version has a major and minor number. Two -+ cluster nodes can interoperate if they have an identical -+ major number and an overlapping minor number - thus, -+ a node with version 1.10 can interoperate with a node -+ sporting version 1.8, as long as both use the 1.8 protocol. -+ -+ Reading from this file returns a single line, the major -+ number and minor number joined by a period, eg "1.10". -+ -+ This file is read-only. The value is compiled into the -+ driver. -+ -+What: /sys/fs/ocfs2/loaded_cluster_plugins -+Date: April 2008 -+Contact: ocfs2-devel@oss.oracle.com -+Description: -+ The /sys/fs/ocfs2/loaded_cluster_plugins file describes -+ the available plugins to support ocfs2 cluster operation. -+ A cluster plugin is required to use ocfs2 in a cluster. -+ There are currently two available plugins: -+ -+ * 'o2cb' - The classic o2cb cluster stack that ocfs2 has -+ used since its inception. -+ * 'user' - A plugin supporting userspace cluster software -+ in conjunction with fs/dlm. -+ -+ Reading from this file returns the names of all loaded -+ plugins, one per line. -+ -+ This file is read-only. Its contents may change as -+ plugins are loaded or removed. -+ -+What: /sys/fs/ocfs2/active_cluster_plugin -+Date: April 2008 -+Contact: ocfs2-devel@oss.oracle.com -+Description: -+ The /sys/fs/ocfs2/active_cluster_plugin displays which -+ cluster plugin is currently in use by the filesystem. -+ The active plugin will appear in the loaded_cluster_plugins -+ file as well. Only one plugin can be used at a time. -+ -+ Reading from this file returns the name of the active plugin -+ on a single line. -+ -+ This file is read-only. Which plugin is active depends on -+ the cluster stack in use. The contents may change -+ when all filesystems are unmounted and the cluster stack -+ is changed. -+ -+What: /sys/fs/ocfs2/cluster_stack -+Date: April 2008 -+Contact: ocfs2-devel@oss.oracle.com -+Description: -+ The /sys/fs/ocfs2/cluster_stack file contains the name -+ of current ocfs2 cluster stack. This value is set by -+ userspace tools when bringing the cluster stack online. -+ -+ Cluster stack names are 4 characters in length. -+ -+ When the 'o2cb' cluster stack is used, the 'o2cb' cluster -+ plugin is active. All other cluster stacks use the 'user' -+ cluster plugin. -+ -+ Reading from this file returns the name of the current -+ cluster stack on a single line. -+ -+ Writing a new stack name to this file changes the current -+ cluster stack unless there are mounted ocfs2 filesystems. -+ If there are mounted filesystems, attempts to change the -+ stack return an error. -+ -+Users: -+ ocfs2-tools -diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt -index b939ebb..80d1504 100644 ---- a/Documentation/DMA-API.txt -+++ b/Documentation/DMA-API.txt -@@ -145,7 +145,7 @@ Part Ic - DMA addressing limitations - int - dma_supported(struct device *dev, u64 mask) - int --pci_dma_supported(struct device *dev, u64 mask) -+pci_dma_supported(struct pci_dev *hwdev, u64 mask) - - Checks to see if the device can support DMA to the memory described by - mask. -@@ -189,7 +189,7 @@ dma_addr_t - dma_map_single(struct device *dev, void *cpu_addr, size_t size, - enum dma_data_direction direction) - dma_addr_t --pci_map_single(struct device *dev, void *cpu_addr, size_t size, -+pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size, - int direction) - - Maps a piece of processor virtual memory so it can be accessed by the -@@ -395,6 +395,71 @@ Notes: You must do this: - - See also dma_map_single(). - -+dma_addr_t -+dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size, -+ enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+ -+void -+dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, -+ size_t size, enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+ -+int -+dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, -+ int nents, enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+ -+void -+dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, -+ int nents, enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+ -+The four functions above are just like the counterpart functions -+without the _attrs suffixes, except that they pass an optional -+struct dma_attrs*. -+ -+struct dma_attrs encapsulates a set of "dma attributes". For the -+definition of struct dma_attrs see linux/dma-attrs.h. -+ -+The interpretation of dma attributes is architecture-specific, and -+each attribute should be documented in Documentation/DMA-attributes.txt. -+ -+If struct dma_attrs* is NULL, the semantics of each of these -+functions is identical to those of the corresponding function -+without the _attrs suffix. As a result dma_map_single_attrs() -+can generally replace dma_map_single(), etc. -+ -+As an example of the use of the *_attrs functions, here's how -+you could pass an attribute DMA_ATTR_FOO when mapping memory -+for DMA: -+ -+#include -+/* DMA_ATTR_FOO should be defined in linux/dma-attrs.h and -+ * documented in Documentation/DMA-attributes.txt */ -+... -+ -+ DEFINE_DMA_ATTRS(attrs); -+ dma_set_attr(DMA_ATTR_FOO, &attrs); -+ .... -+ n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, &attr); -+ .... -+ -+Architectures that care about DMA_ATTR_FOO would check for its -+presence in their implementations of the mapping and unmapping -+routines, e.g.: -+ -+void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr, -+ size_t size, enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+{ -+ .... -+ int foo = dma_get_attr(DMA_ATTR_FOO, attrs); -+ .... -+ if (foo) -+ /* twizzle the frobnozzle */ -+ .... -+ - - Part II - Advanced dma_ usage - ----------------------------- -diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt -new file mode 100644 -index 0000000..6d772f8 ---- /dev/null -+++ b/Documentation/DMA-attributes.txt -@@ -0,0 +1,24 @@ -+ DMA attributes -+ ============== -+ -+This document describes the semantics of the DMA attributes that are -+defined in linux/dma-attrs.h. -+ -+DMA_ATTR_WRITE_BARRIER -+---------------------- -+ -+DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA -+to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces -+all pending DMA writes to complete, and thus provides a mechanism to -+strictly order DMA from a device across all intervening busses and -+bridges. This barrier is not specific to a particular type of -+interconnect, it applies to the system as a whole, and so its -+implementation must account for the idiosyncracies of the system all -+the way from the DMA device to memory. -+ -+As an example of a situation where DMA_ATTR_WRITE_BARRIER would be -+useful, suppose that a device does a DMA write to indicate that data is -+ready and available in memory. The DMA of the "completion indication" -+could race with data DMA. Mapping the memory used for completion -+indications with DMA_ATTR_WRITE_BARRIER would prevent the race. -+ -diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt -index d84f89d..b463ecd 100644 ---- a/Documentation/DMA-mapping.txt -+++ b/Documentation/DMA-mapping.txt -@@ -315,11 +315,11 @@ you should do: - - dma_addr_t dma_handle; - -- cpu_addr = pci_alloc_consistent(dev, size, &dma_handle); -+ cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle); - --where dev is a struct pci_dev *. You should pass NULL for PCI like buses --where devices don't have struct pci_dev (like ISA, EISA). This may be --called in interrupt context. -+where pdev is a struct pci_dev *. This may be called in interrupt context. -+You should use dma_alloc_coherent (see DMA-API.txt) for buses -+where devices don't have struct pci_dev (like ISA, EISA). - - This argument is needed because the DMA translations may be bus - specific (and often is private to the bus which the device is attached -@@ -332,7 +332,7 @@ __get_free_pages (but takes size instead of a page order). If your - driver needs regions sized smaller than a page, you may prefer using - the pci_pool interface, described below. - --The consistent DMA mapping interfaces, for non-NULL dev, will by -+The consistent DMA mapping interfaces, for non-NULL pdev, will by - default return a DMA address which is SAC (Single Address Cycle) - addressable. Even if the device indicates (via PCI dma mask) that it - may address the upper 32-bits and thus perform DAC cycles, consistent -@@ -354,9 +354,9 @@ buffer you receive will not cross a 64K boundary. - - To unmap and free such a DMA region, you call: - -- pci_free_consistent(dev, size, cpu_addr, dma_handle); -+ pci_free_consistent(pdev, size, cpu_addr, dma_handle); - --where dev, size are the same as in the above call and cpu_addr and -+where pdev, size are the same as in the above call and cpu_addr and - dma_handle are the values pci_alloc_consistent returned to you. - This function may not be called in interrupt context. - -@@ -371,9 +371,9 @@ Create a pci_pool like this: - - struct pci_pool *pool; - -- pool = pci_pool_create(name, dev, size, align, alloc); -+ pool = pci_pool_create(name, pdev, size, align, alloc); - --The "name" is for diagnostics (like a kmem_cache name); dev and size -+The "name" is for diagnostics (like a kmem_cache name); pdev and size - are as above. The device's hardware alignment requirement for this - type of data is "align" (which is expressed in bytes, and must be a - power of two). If your device has no boundary crossing restrictions, -@@ -472,11 +472,11 @@ To map a single region, you do: - void *addr = buffer->ptr; - size_t size = buffer->len; - -- dma_handle = pci_map_single(dev, addr, size, direction); -+ dma_handle = pci_map_single(pdev, addr, size, direction); - - and to unmap it: - -- pci_unmap_single(dev, dma_handle, size, direction); -+ pci_unmap_single(pdev, dma_handle, size, direction); - - You should call pci_unmap_single when the DMA activity is finished, e.g. - from the interrupt which told you that the DMA transfer is done. -@@ -493,17 +493,17 @@ Specifically: - unsigned long offset = buffer->offset; - size_t size = buffer->len; - -- dma_handle = pci_map_page(dev, page, offset, size, direction); -+ dma_handle = pci_map_page(pdev, page, offset, size, direction); - - ... - -- pci_unmap_page(dev, dma_handle, size, direction); -+ pci_unmap_page(pdev, dma_handle, size, direction); - - Here, "offset" means byte offset within the given page. - - With scatterlists, you map a region gathered from several regions by: - -- int i, count = pci_map_sg(dev, sglist, nents, direction); -+ int i, count = pci_map_sg(pdev, sglist, nents, direction); - struct scatterlist *sg; - - for_each_sg(sglist, sg, count, i) { -@@ -527,7 +527,7 @@ accessed sg->address and sg->length as shown above. - - To unmap a scatterlist, just call: - -- pci_unmap_sg(dev, sglist, nents, direction); -+ pci_unmap_sg(pdev, sglist, nents, direction); - - Again, make sure DMA activity has already finished. - -@@ -550,11 +550,11 @@ correct copy of the DMA buffer. - So, firstly, just map it with pci_map_{single,sg}, and after each DMA - transfer call either: - -- pci_dma_sync_single_for_cpu(dev, dma_handle, size, direction); -+ pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction); - - or: - -- pci_dma_sync_sg_for_cpu(dev, sglist, nents, direction); -+ pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction); - - as appropriate. - -@@ -562,7 +562,7 @@ Then, if you wish to let the device get at the DMA area again, - finish accessing the data with the cpu, and then before actually - giving the buffer to the hardware call either: - -- pci_dma_sync_single_for_device(dev, dma_handle, size, direction); -+ pci_dma_sync_single_for_device(pdev, dma_handle, size, direction); - - or: - -@@ -739,7 +739,7 @@ failure can be determined by: - - dma_addr_t dma_handle; - -- dma_handle = pci_map_single(dev, addr, size, direction); -+ dma_handle = pci_map_single(pdev, addr, size, direction); - if (pci_dma_mapping_error(dma_handle)) { - /* - * reduce current DMA mapping usage, -diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile -index 300e170..0eb0d02 100644 ---- a/Documentation/DocBook/Makefile -+++ b/Documentation/DocBook/Makefile -@@ -9,9 +9,10 @@ - DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ - kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ - procfs-guide.xml writing_usb_driver.xml networking.xml \ -- kernel-api.xml filesystems.xml lsm.xml usb.xml \ -+ kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ - gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ -- genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml -+ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ -+ mac80211.xml debugobjects.xml - - ### - # The build process is as follows (targets): -@@ -186,8 +187,11 @@ quiet_cmd_fig2png = FIG2PNG $@ - - ### - # Rule to convert a .c file to inline XML documentation -+ gen_xml = : -+ quiet_gen_xml = echo ' GEN $@' -+silent_gen_xml = : - %.xml: %.c -- @echo ' GEN $@' -+ @$($(quiet)gen_xml) - @( \ - echo ""; \ - expand --tabs=8 < $< | \ -diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl -new file mode 100644 -index 0000000..7f5f218 ---- /dev/null -+++ b/Documentation/DocBook/debugobjects.tmpl -@@ -0,0 +1,391 @@ -+ -+ -+ -+ -+ -+ Debug objects life time -+ -+ -+ -+ Thomas -+ Gleixner -+ -+
-+ tglx@linutronix.de -+
-+
-+
-+
-+ -+ -+ 2008 -+ Thomas Gleixner -+ -+ -+ -+ -+ This documentation is free software; you can redistribute -+ it and/or modify it under the terms of the GNU General Public -+ License version 2 as published by the Free Software Foundation. -+ -+ -+ -+ This program is distributed in the hope that it will be -+ useful, but WITHOUT ANY WARRANTY; without even the implied -+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -+ See the GNU General Public License for more details. -+ -+ -+ -+ You should have received a copy of the GNU General Public -+ License along with this program; if not, write to the Free -+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, -+ MA 02111-1307 USA -+ -+ -+ -+ For more details see the file COPYING in the source -+ distribution of Linux. -+ -+ -+
-+ -+ -+ -+ -+ Introduction -+ -+ debugobjects is a generic infrastructure to track the life time -+ of kernel objects and validate the operations on those. -+ -+ -+ debugobjects is useful to check for the following error patterns: -+ -+ Activation of uninitialized objects -+ Initialization of active objects -+ Usage of freed/destroyed objects -+ -+ -+ -+ debugobjects is not changing the data structure of the real -+ object so it can be compiled in with a minimal runtime impact -+ and enabled on demand with a kernel command line option. -+ -+ -+ -+ -+ Howto use debugobjects -+ -+ A kernel subsystem needs to provide a data structure which -+ describes the object type and add calls into the debug code at -+ appropriate places. The data structure to describe the object -+ type needs at minimum the name of the object type. Optional -+ functions can and should be provided to fixup detected problems -+ so the kernel can continue to work and the debug information can -+ be retrieved from a live system instead of hard core debugging -+ with serial consoles and stack trace transcripts from the -+ monitor. -+ -+ -+ The debug calls provided by debugobjects are: -+ -+ debug_object_init -+ debug_object_init_on_stack -+ debug_object_activate -+ debug_object_deactivate -+ debug_object_destroy -+ debug_object_free -+ -+ Each of these functions takes the address of the real object and -+ a pointer to the object type specific debug description -+ structure. -+ -+ -+ Each detected error is reported in the statistics and a limited -+ number of errors are printk'ed including a full stack trace. -+ -+ -+ The statistics are available via debugfs/debug_objects/stats. -+ They provide information about the number of warnings and the -+ number of successful fixups along with information about the -+ usage of the internal tracking objects and the state of the -+ internal tracking objects pool. -+ -+ -+ -+ Debug functions -+ -+ Debug object function reference -+!Elib/debugobjects.c -+ -+ -+ debug_object_init -+ -+ This function is called whenever the initialization function -+ of a real object is called. -+ -+ -+ When the real object is already tracked by debugobjects it is -+ checked, whether the object can be initialized. Initializing -+ is not allowed for active and destroyed objects. When -+ debugobjects detects an error, then it calls the fixup_init -+ function of the object type description structure if provided -+ by the caller. The fixup function can correct the problem -+ before the real initialization of the object happens. E.g. it -+ can deactivate an active object in order to prevent damage to -+ the subsystem. -+ -+ -+ When the real object is not yet tracked by debugobjects, -+ debugobjects allocates a tracker object for the real object -+ and sets the tracker object state to ODEBUG_STATE_INIT. It -+ verifies that the object is not on the callers stack. If it is -+ on the callers stack then a limited number of warnings -+ including a full stack trace is printk'ed. The calling code -+ must use debug_object_init_on_stack() and remove the object -+ before leaving the function which allocated it. See next -+ section. -+ -+ -+ -+ -+ debug_object_init_on_stack -+ -+ This function is called whenever the initialization function -+ of a real object which resides on the stack is called. -+ -+ -+ When the real object is already tracked by debugobjects it is -+ checked, whether the object can be initialized. Initializing -+ is not allowed for active and destroyed objects. When -+ debugobjects detects an error, then it calls the fixup_init -+ function of the object type description structure if provided -+ by the caller. The fixup function can correct the problem -+ before the real initialization of the object happens. E.g. it -+ can deactivate an active object in order to prevent damage to -+ the subsystem. -+ -+ -+ When the real object is not yet tracked by debugobjects -+ debugobjects allocates a tracker object for the real object -+ and sets the tracker object state to ODEBUG_STATE_INIT. It -+ verifies that the object is on the callers stack. -+ -+ -+ An object which is on the stack must be removed from the -+ tracker by calling debug_object_free() before the function -+ which allocates the object returns. Otherwise we keep track of -+ stale objects. -+ -+ -+ -+ -+ debug_object_activate -+ -+ This function is called whenever the activation function of a -+ real object is called. -+ -+ -+ When the real object is already tracked by debugobjects it is -+ checked, whether the object can be activated. Activating is -+ not allowed for active and destroyed objects. When -+ debugobjects detects an error, then it calls the -+ fixup_activate function of the object type description -+ structure if provided by the caller. The fixup function can -+ correct the problem before the real activation of the object -+ happens. E.g. it can deactivate an active object in order to -+ prevent damage to the subsystem. -+ -+ -+ When the real object is not yet tracked by debugobjects then -+ the fixup_activate function is called if available. This is -+ necessary to allow the legitimate activation of statically -+ allocated and initialized objects. The fixup function checks -+ whether the object is valid and calls the debug_objects_init() -+ function to initialize the tracking of this object. -+ -+ -+ When the activation is legitimate, then the state of the -+ associated tracker object is set to ODEBUG_STATE_ACTIVE. -+ -+ -+ -+ -+ debug_object_deactivate -+ -+ This function is called whenever the deactivation function of -+ a real object is called. -+ -+ -+ When the real object is tracked by debugobjects it is checked, -+ whether the object can be deactivated. Deactivating is not -+ allowed for untracked or destroyed objects. -+ -+ -+ When the deactivation is legitimate, then the state of the -+ associated tracker object is set to ODEBUG_STATE_INACTIVE. -+ -+ -+ -+ -+ debug_object_destroy -+ -+ This function is called to mark an object destroyed. This is -+ useful to prevent the usage of invalid objects, which are -+ still available in memory: either statically allocated objects -+ or objects which are freed later. -+ -+ -+ When the real object is tracked by debugobjects it is checked, -+ whether the object can be destroyed. Destruction is not -+ allowed for active and destroyed objects. When debugobjects -+ detects an error, then it calls the fixup_destroy function of -+ the object type description structure if provided by the -+ caller. The fixup function can correct the problem before the -+ real destruction of the object happens. E.g. it can deactivate -+ an active object in order to prevent damage to the subsystem. -+ -+ -+ When the destruction is legitimate, then the state of the -+ associated tracker object is set to ODEBUG_STATE_DESTROYED. -+ -+ -+ -+ -+ debug_object_free -+ -+ This function is called before an object is freed. -+ -+ -+ When the real object is tracked by debugobjects it is checked, -+ whether the object can be freed. Free is not allowed for -+ active objects. When debugobjects detects an error, then it -+ calls the fixup_free function of the object type description -+ structure if provided by the caller. The fixup function can -+ correct the problem before the real free of the object -+ happens. E.g. it can deactivate an active object in order to -+ prevent damage to the subsystem. -+ -+ -+ Note that debug_object_free removes the object from the -+ tracker. Later usage of the object is detected by the other -+ debug checks. -+ -+ -+ -+ -+ Fixup functions -+ -+ Debug object type description structure -+!Iinclude/linux/debugobjects.h -+ -+ -+ fixup_init -+ -+ This function is called from the debug code whenever a problem -+ in debug_object_init is detected. The function takes the -+ address of the object and the state which is currently -+ recorded in the tracker. -+ -+ -+ Called from debug_object_init when the object state is: -+ -+ ODEBUG_STATE_ACTIVE -+ -+ -+ -+ The function returns 1 when the fixup was successful, -+ otherwise 0. The return value is used to update the -+ statistics. -+ -+ -+ Note, that the function needs to call the debug_object_init() -+ function again, after the damage has been repaired in order to -+ keep the state consistent. -+ -+ -+ -+ -+ fixup_activate -+ -+ This function is called from the debug code whenever a problem -+ in debug_object_activate is detected. -+ -+ -+ Called from debug_object_activate when the object state is: -+ -+ ODEBUG_STATE_NOTAVAILABLE -+ ODEBUG_STATE_ACTIVE -+ -+ -+ -+ The function returns 1 when the fixup was successful, -+ otherwise 0. The return value is used to update the -+ statistics. -+ -+ -+ Note that the function needs to call the debug_object_activate() -+ function again after the damage has been repaired in order to -+ keep the state consistent. -+ -+ -+ The activation of statically initialized objects is a special -+ case. When debug_object_activate() has no tracked object for -+ this object address then fixup_activate() is called with -+ object state ODEBUG_STATE_NOTAVAILABLE. The fixup function -+ needs to check whether this is a legitimate case of a -+ statically initialized object or not. In case it is it calls -+ debug_object_init() and debug_object_activate() to make the -+ object known to the tracker and marked active. In this case -+ the function should return 0 because this is not a real fixup. -+ -+ -+ -+ -+ fixup_destroy -+ -+ This function is called from the debug code whenever a problem -+ in debug_object_destroy is detected. -+ -+ -+ Called from debug_object_destroy when the object state is: -+ -+ ODEBUG_STATE_ACTIVE -+ -+ -+ -+ The function returns 1 when the fixup was successful, -+ otherwise 0. The return value is used to update the -+ statistics. -+ -+ -+ -+ fixup_free -+ -+ This function is called from the debug code whenever a problem -+ in debug_object_free is detected. Further it can be called -+ from the debug checks in kfree/vfree, when an active object is -+ detected from the debug_check_no_obj_freed() sanity checks. -+ -+ -+ Called from debug_object_free() or debug_check_no_obj_freed() -+ when the object state is: -+ -+ ODEBUG_STATE_ACTIVE -+ -+ -+ -+ The function returns 1 when the fixup was successful, -+ otherwise 0. The return value is used to update the -+ statistics. -+ -+ -+ -+ -+ Known Bugs And Assumptions -+ -+ None (knock on wood). -+ -+ -+
-diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl -index dc0f30c..b7b1482 100644 ---- a/Documentation/DocBook/kernel-api.tmpl -+++ b/Documentation/DocBook/kernel-api.tmpl -@@ -119,7 +119,7 @@ X!Ilib/string.c - !Elib/string.c - - Bit Operations --!Iinclude/asm-x86/bitops_32.h -+!Iinclude/asm-x86/bitops.h - - - -@@ -297,11 +297,6 @@ X!Earch/x86/kernel/mca_32.c - !Ikernel/acct.c - - -- -- Power Management --!Ekernel/power/pm.c -- -- - - Device drivers infrastructure - Device Drivers Base -@@ -650,4 +645,58 @@ X!Idrivers/video/console/fonts.c - !Edrivers/i2c/i2c-core.c - - -+ -+ Clock Framework -+ -+ -+ The clock framework defines programming interfaces to support -+ software management of the system clock tree. -+ This framework is widely used with System-On-Chip (SOC) platforms -+ to support power management and various devices which may need -+ custom clock rates. -+ Note that these "clocks" don't relate to timekeeping or real -+ time clocks (RTCs), each of which have separate frameworks. -+ These struct clk instances may be used -+ to manage for example a 96 MHz signal that is used to shift bits -+ into and out of peripherals or busses, or otherwise trigger -+ synchronous state machine transitions in system hardware. -+ -+ -+ -+ Power management is supported by explicit software clock gating: -+ unused clocks are disabled, so the system doesn't waste power -+ changing the state of transistors that aren't in active use. -+ On some systems this may be backed by hardware clock gating, -+ where clocks are gated without being disabled in software. -+ Sections of chips that are powered but not clocked may be able -+ to retain their last state. -+ This low power state is often called a retention -+ mode. -+ This mode still incurs leakage currents, especially with finer -+ circuit geometries, but for CMOS circuits power is mostly used -+ by clocked state changes. -+ -+ -+ -+ Power-aware drivers only enable their clocks when the device -+ they manage is in active use. Also, system sleep states often -+ differ according to which clock domains are active: while a -+ "standby" state may allow wakeup from several active domains, a -+ "mem" (suspend-to-RAM) state may require a more wholesale shutdown -+ of clocks derived from higher speed PLLs and oscillators, limiting -+ the number of possible wakeup event sources. A driver's suspend -+ method may need to be aware of system-specific clock constraints -+ on the target sleep state. -+ -+ -+ -+ Some platforms support programmable clock generators. These -+ can be used by external chips of various kinds, such as other -+ CPUs, multimedia codecs, and devices with strict requirements -+ for interface clocking. -+ -+ -+!Iinclude/linux/clk.h -+ -+ - -diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl -index 2e9d6b4..77c42f4 100644 ---- a/Documentation/DocBook/kernel-locking.tmpl -+++ b/Documentation/DocBook/kernel-locking.tmpl -@@ -241,7 +241,7 @@ - - - The third type is a semaphore -- (include/asm/semaphore.h): it -+ (include/linux/semaphore.h): it - can have more than one holder at any time (the number decided at - initialization time), although it is most commonly used as a - single-holder lock (a mutex). If you can't get a semaphore, your -@@ -290,7 +290,7 @@ - - If you have a data structure which is only ever accessed from - user context, then you can use a simple semaphore -- (linux/asm/semaphore.h) to protect it. This -+ (linux/linux/semaphore.h) to protect it. This - is the most trivial case: you initialize the semaphore to the number - of resources available (usually 1), and call - down_interruptible() to grab the semaphore, and -@@ -854,7 +854,7 @@ The change is shown below, in standard patch format: the - }; - - -static DEFINE_MUTEX(cache_lock); --+static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; -++static DEFINE_SPINLOCK(cache_lock); - static LIST_HEAD(cache); - static unsigned int cache_num = 0; - #define MAX_CACHE_SIZE 10 -@@ -1238,7 +1238,7 @@ Here is the "lock-per-object" implementation: - - int popularity; - }; - -- static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; -+ static DEFINE_SPINLOCK(cache_lock); - @@ -77,6 +84,7 @@ - obj->id = id; - obj->popularity = 0; -@@ -1656,7 +1656,7 @@ the amount of locking which needs to be done. - #include <linux/slab.h> - #include <linux/string.h> - +#include <linux/rcupdate.h> -- #include <asm/semaphore.h> -+ #include <linux/semaphore.h> - #include <asm/errno.h> - - struct object -diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl -new file mode 100644 -index 0000000..97618be ---- /dev/null -+++ b/Documentation/DocBook/kgdb.tmpl -@@ -0,0 +1,447 @@ -+ -+ -+ -+ -+ -+ Using kgdb and the kgdb Internals -+ -+ -+ -+ Jason -+ Wessel -+ -+
-+ jason.wessel@windriver.com -+
-+
-+
-+
-+ -+ -+ -+ Tom -+ Rini -+ -+
-+ trini@kernel.crashing.org -+
-+
-+
-+
-+ -+ -+ -+ Amit S. -+ Kale -+ -+
-+ amitkale@linsyssoft.com -+
-+
-+
-+
-+ -+ -+ 2008 -+ Wind River Systems, Inc. -+ -+ -+ 2004-2005 -+ MontaVista Software, Inc. -+ -+ -+ 2004 -+ Amit S. Kale -+ -+ -+ -+ -+ This file is licensed under the terms of the GNU General Public License -+ version 2. This program is licensed "as is" without any warranty of any -+ kind, whether express or implied. -+ -+ -+ -+
-+ -+ -+ -+ Introduction -+ -+ kgdb is a source level debugger for linux kernel. It is used along -+ with gdb to debug a linux kernel. The expectation is that gdb can -+ be used to "break in" to the kernel to inspect memory, variables -+ and look through a cal stack information similar to what an -+ application developer would use gdb for. It is possible to place -+ breakpoints in kernel code and perform some limited execution -+ stepping. -+ -+ -+ Two machines are required for using kgdb. One of these machines is a -+ development machine and the other is a test machine. The kernel -+ to be debugged runs on the test machine. The development machine -+ runs an instance of gdb against the vmlinux file which contains -+ the symbols (not boot image such as bzImage, zImage, uImage...). -+ In gdb the developer specifies the connection parameters and -+ connects to kgdb. Depending on which kgdb I/O modules exist in -+ the kernel for a given architecture, it may be possible to debug -+ the test machine's kernel with the development machine using a -+ rs232 or ethernet connection. -+ -+ -+ -+ Compiling a kernel -+ -+ To enable CONFIG_KGDB, look under the "Kernel debugging" -+ and then select "KGDB: kernel debugging with remote gdb". -+ -+ -+ Next you should choose one of more I/O drivers to interconnect debugging -+ host and debugged target. Early boot debugging requires a KGDB -+ I/O driver that supports early debugging and the driver must be -+ built into the kernel directly. Kgdb I/O driver configuration -+ takes place via kernel or module parameters, see following -+ chapter. -+ -+ -+ The kgdb test compile options are described in the kgdb test suite chapter. -+ -+ -+ -+ -+ Enable kgdb for debugging -+ -+ In order to use kgdb you must activate it by passing configuration -+ information to one of the kgdb I/O drivers. If you do not pass any -+ configuration information kgdb will not do anything at all. Kgdb -+ will only actively hook up to the kernel trap hooks if a kgdb I/O -+ driver is loaded and configured. If you unconfigure a kgdb I/O -+ driver, kgdb will unregister all the kernel hook points. -+ -+ -+ All drivers can be reconfigured at run time, if -+ CONFIG_SYSFS and CONFIG_MODULES -+ are enabled, by echo'ing a new config string to -+ /sys/module/<driver>/parameter/<option>. -+ The driver can be unconfigured by passing an empty string. You cannot -+ change the configuration while the debugger is attached. Make sure -+ to detach the debugger with the detach command -+ prior to trying unconfigure a kgdb I/O driver. -+ -+ -+ Kernel parameter: kgdbwait -+ -+ The Kernel command line option kgdbwait makes -+ kgdb wait for a debugger connection during booting of a kernel. You -+ can only use this option you compiled a kgdb I/O driver into the -+ kernel and you specified the I/O driver configuration as a kernel -+ command line option. The kgdbwait parameter should always follow the -+ configuration parameter for the kgdb I/O driver in the kernel -+ command line else the I/O driver will not be configured prior to -+ asking the kernel to use it to wait. -+ -+ -+ The kernel will stop and wait as early as the I/O driver and -+ architecture will allow when you use this option. If you build the -+ kgdb I/O driver as a kernel module kgdbwait will not do anything. -+ -+ -+ -+ Kernel parameter: kgdboc -+ -+ The kgdboc driver was originally an abbreviation meant to stand for -+ "kgdb over console". Kgdboc is designed to work with a single -+ serial port. It was meant to cover the circumstance -+ where you wanted to use a serial console as your primary console as -+ well as using it to perform kernel debugging. Of course you can -+ also use kgdboc without assigning a console to the same port. -+ -+ -+ Using kgdboc -+ -+ You can configure kgdboc via sysfs or a module or kernel boot line -+ parameter depending on if you build with CONFIG_KGDBOC as a module -+ or built-in. -+ -+ From the module load or build-in -+ kgdboc=<tty-device>,[baud] -+ -+ The example here would be if your console port was typically ttyS0, you would use something like kgdboc=ttyS0,115200 or on the ARM Versatile AB you would likely use kgdboc=ttyAMA0,115200 -+ -+ -+ From sysfs -+ echo ttyS0 > /sys/module/kgdboc/parameters/kgdboc -+ -+ -+ -+ -+ NOTE: Kgdboc does not support interrupting the target via the -+ gdb remote protocol. You must manually send a sysrq-g unless you -+ have a proxy that splits console output to a terminal problem and -+ has a separate port for the debugger to connect to that sends the -+ sysrq-g for you. -+ -+ When using kgdboc with no debugger proxy, you can end up -+ connecting the debugger for one of two entry points. If an -+ exception occurs after you have loaded kgdboc a message should print -+ on the console stating it is waiting for the debugger. In case you -+ disconnect your terminal program and then connect the debugger in -+ its place. If you want to interrupt the target system and forcibly -+ enter a debug session you have to issue a Sysrq sequence and then -+ type the letter g. Then you disconnect the -+ terminal session and connect gdb. Your options if you don't like -+ this are to hack gdb to send the sysrq-g for you as well as on the -+ initial connect, or to use a debugger proxy that allows an -+ unmodified gdb to do the debugging. -+ -+ -+ -+ -+ Kernel parameter: kgdbcon -+ -+ Kgdb supports using the gdb serial protocol to send console messages -+ to the debugger when the debugger is connected and running. There -+ are two ways to activate this feature. -+ -+ Activate with the kernel command line option: -+ kgdbcon -+ -+ Use sysfs before configuring an io driver -+ -+ echo 1 > /sys/module/kgdb/parameters/kgdb_use_con -+ -+ -+ NOTE: If you do this after you configure the kgdb I/O driver, the -+ setting will not take effect until the next point the I/O is -+ reconfigured. -+ -+ -+ -+ -+ -+ IMPORTANT NOTE: Using this option with kgdb over the console -+ (kgdboc) or kgdb over ethernet (kgdboe) is not supported. -+ -+ -+ -+ -+ Connecting gdb -+ -+ If you are using kgdboc, you need to have used kgdbwait as a boot -+ argument, issued a sysrq-g, or the system you are going to debug -+ has already taken an exception and is waiting for the debugger to -+ attach before you can connect gdb. -+ -+ -+ If you are not using different kgdb I/O driver other than kgdboc, -+ you should be able to connect and the target will automatically -+ respond. -+ -+ -+ Example (using a serial port): -+ -+ -+ % gdb ./vmlinux -+ (gdb) set remotebaud 115200 -+ (gdb) target remote /dev/ttyS0 -+ -+ -+ Example (kgdb to a terminal server): -+ -+ -+ % gdb ./vmlinux -+ (gdb) target remote udp:192.168.2.2:6443 -+ -+ -+ Example (kgdb over ethernet): -+ -+ -+ % gdb ./vmlinux -+ (gdb) target remote udp:192.168.2.2:6443 -+ -+ -+ Once connected, you can debug a kernel the way you would debug an -+ application program. -+ -+ -+ If you are having problems connecting or something is going -+ seriously wrong while debugging, it will most often be the case -+ that you want to enable gdb to be verbose about its target -+ communications. You do this prior to issuing the target -+ remote command by typing in: set remote debug 1 -+ -+ -+ -+ kgdb Test Suite -+ -+ When kgdb is enabled in the kernel config you can also elect to -+ enable the config parameter KGDB_TESTS. Turning this on will -+ enable a special kgdb I/O module which is designed to test the -+ kgdb internal functions. -+ -+ -+ The kgdb tests are mainly intended for developers to test the kgdb -+ internals as well as a tool for developing a new kgdb architecture -+ specific implementation. These tests are not really for end users -+ of the Linux kernel. The primary source of documentation would be -+ to look in the drivers/misc/kgdbts.c file. -+ -+ -+ The kgdb test suite can also be configured at compile time to run -+ the core set of tests by setting the kernel config parameter -+ KGDB_TESTS_ON_BOOT. This particular option is aimed at automated -+ regression testing and does not require modifying the kernel boot -+ config arguments. If this is turned on, the kgdb test suite can -+ be disabled by specifying "kgdbts=" as a kernel boot argument. -+ -+ -+ -+ KGDB Internals -+ -+ Architecture Specifics -+ -+ Kgdb is organized into three basic components: -+ -+ kgdb core -+ -+ The kgdb core is found in kernel/kgdb.c. It contains: -+ -+ All the logic to implement the gdb serial protocol -+ A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system. -+ The API to talk to the kgdb I/O drivers -+ The API to make calls to the arch specific kgdb implementation -+ The logic to perform safe memory reads and writes to memory while using the debugger -+ A full implementation for software breakpoints unless overridden by the arch -+ -+ -+ -+ kgdb arch specific implementation -+ -+ This implementation is generally found in arch/*/kernel/kgdb.c. -+ As an example, arch/x86/kernel/kgdb.c contains the specifics to -+ implement HW breakpoint as well as the initialization to -+ dynamically register and unregister for the trap handlers on -+ this architecture. The arch specific portion implements: -+ -+ contains an arch specific trap catcher which -+ invokes kgdb_handle_exception() to start kgdb about doing its -+ work -+ translation to and from gdb specific packet format to pt_regs -+ Registration and unregistration of architecture specific trap hooks -+ Any special exception handling and cleanup -+ NMI exception handling and cleanup -+ (optional)HW breakpoints -+ -+ -+ -+ kgdb I/O driver -+ -+ Each kgdb I/O driver has to provide an implemenation for the following: -+ -+ configuration via builtin or module -+ dynamic configuration and kgdb hook registration calls -+ read and write character interface -+ A cleanup handler for unconfiguring from the kgdb core -+ (optional) Early debug methodology -+ -+ Any given kgdb I/O driver has to operate very closely with the -+ hardware and must do it in such a way that does not enable -+ interrupts or change other parts of the system context without -+ completely restoring them. The kgdb core will repeatedly "poll" -+ a kgdb I/O driver for characters when it needs input. The I/O -+ driver is expected to return immediately if there is no data -+ available. Doing so allows for the future possibility to touch -+ watch dog hardware in such a way as to have a target system not -+ reset when these are enabled. -+ -+ -+ -+ -+ -+ If you are intent on adding kgdb architecture specific support -+ for a new architecture, the architecture should define -+ HAVE_ARCH_KGDB in the architecture specific -+ Kconfig file. This will enable kgdb for the architecture, and -+ at that point you must create an architecture specific kgdb -+ implementation. -+ -+ -+ There are a few flags which must be set on every architecture in -+ their <asm/kgdb.h> file. These are: -+ -+ -+ -+ NUMREGBYTES: The size in bytes of all of the registers, so -+ that we can ensure they will all fit into a packet. -+ -+ -+ BUFMAX: The size in bytes of the buffer GDB will read into. -+ This must be larger than NUMREGBYTES. -+ -+ -+ CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call -+ flush_cache_range or flush_icache_range. On some architectures, -+ these functions may not be safe to call on SMP since we keep other -+ CPUs in a holding pattern. -+ -+ -+ -+ -+ -+ There are also the following functions for the common backend, -+ found in kernel/kgdb.c, that must be supplied by the -+ architecture-specific backend unless marked as (optional), in -+ which case a default function maybe used if the architecture -+ does not need to provide a specific implementation. -+ -+!Iinclude/linux/kgdb.h -+ -+ -+ kgdboc internals -+ -+ The kgdboc driver is actually a very thin driver that relies on the -+ underlying low level to the hardware driver having "polling hooks" -+ which the to which the tty driver is attached. In the initial -+ implementation of kgdboc it the serial_core was changed to expose a -+ low level uart hook for doing polled mode reading and writing of a -+ single character while in an atomic context. When kgdb makes an I/O -+ request to the debugger, kgdboc invokes a call back in the serial -+ core which in turn uses the call back in the uart driver. It is -+ certainly possible to extend kgdboc to work with non-uart based -+ consoles in the future. -+ -+ -+ When using kgdboc with a uart, the uart driver must implement two callbacks in the struct uart_ops. Example from drivers/8250.c: -+#ifdef CONFIG_CONSOLE_POLL -+ .poll_get_char = serial8250_get_poll_char, -+ .poll_put_char = serial8250_put_poll_char, -+#endif -+ -+ Any implementation specifics around creating a polling driver use the -+ #ifdef CONFIG_CONSOLE_POLL, as shown above. -+ Keep in mind that polling hooks have to be implemented in such a way -+ that they can be called from an atomic context and have to restore -+ the state of the uart chip on return such that the system can return -+ to normal when the debugger detaches. You need to be very careful -+ with any kind of lock you consider, because failing here is most -+ going to mean pressing the reset button. -+ -+ -+ -+ -+ Credits -+ -+ The following people have contributed to this document: -+ -+ Amit Kaleamitkale@linsyssoft.com -+ Tom Rinitrini@kernel.crashing.org -+ -+ In March 2008 this document was completely rewritten by: -+ -+ Jason Wesseljason.wessel@windriver.com -+ -+ -+ -+
-+ -diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl -new file mode 100644 -index 0000000..b651e0a ---- /dev/null -+++ b/Documentation/DocBook/mac80211.tmpl -@@ -0,0 +1,335 @@ -+ -+ -+ -+ -+ -+ The mac80211 subsystem for kernel developers -+ -+ -+ -+ Johannes -+ Berg -+ -+
johannes@sipsolutions.net
-+
-+
-+
-+ -+ -+ 2007 -+ 2008 -+ Johannes Berg -+ -+ -+ -+ -+ This documentation is free software; you can redistribute -+ it and/or modify it under the terms of the GNU General Public -+ License version 2 as published by the Free Software Foundation. -+ -+ -+ -+ This documentation is distributed in the hope that it will be -+ useful, but WITHOUT ANY WARRANTY; without even the implied -+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -+ See the GNU General Public License for more details. -+ -+ -+ -+ You should have received a copy of the GNU General Public -+ License along with this documentation; if not, write to the Free -+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, -+ MA 02111-1307 USA -+ -+ -+ -+ For more details see the file COPYING in the source -+ distribution of Linux. -+ -+ -+ -+ -+!Pinclude/net/mac80211.h Introduction -+!Pinclude/net/mac80211.h Warning -+ -+
-+ -+ -+ -+ -+ -+ -+ The basic mac80211 driver interface -+ -+ -+ You should read and understand the information contained -+ within this part of the book while implementing a driver. -+ In some chapters, advanced usage is noted, that may be -+ skipped at first. -+ -+ -+ This part of the book only covers station and monitor mode -+ functionality, additional information required to implement -+ the other modes is covered in the second part of the book. -+ -+ -+ -+ -+ Basic hardware handling -+ TBD -+ -+ This chapter shall contain information on getting a hw -+ struct allocated and registered with mac80211. -+ -+ -+ Since it is required to allocate rates/modes before registering -+ a hw struct, this chapter shall also contain information on setting -+ up the rate/mode structs. -+ -+ -+ Additionally, some discussion about the callbacks and -+ the general programming model should be in here, including -+ the definition of ieee80211_ops which will be referred to -+ a lot. -+ -+ -+ Finally, a discussion of hardware capabilities should be done -+ with references to other parts of the book. -+ -+ -+!Finclude/net/mac80211.h ieee80211_hw -+!Finclude/net/mac80211.h ieee80211_hw_flags -+!Finclude/net/mac80211.h SET_IEEE80211_DEV -+!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR -+!Finclude/net/mac80211.h ieee80211_ops -+!Finclude/net/mac80211.h ieee80211_alloc_hw -+!Finclude/net/mac80211.h ieee80211_register_hw -+!Finclude/net/mac80211.h ieee80211_get_tx_led_name -+!Finclude/net/mac80211.h ieee80211_get_rx_led_name -+!Finclude/net/mac80211.h ieee80211_get_assoc_led_name -+!Finclude/net/mac80211.h ieee80211_get_radio_led_name -+!Finclude/net/mac80211.h ieee80211_unregister_hw -+!Finclude/net/mac80211.h ieee80211_free_hw -+ -+ -+ -+ PHY configuration -+ TBD -+ -+ This chapter should describe PHY handling including -+ start/stop callbacks and the various structures used. -+ -+!Finclude/net/mac80211.h ieee80211_conf -+!Finclude/net/mac80211.h ieee80211_conf_flags -+ -+ -+ -+ Virtual interfaces -+ TBD -+ -+ This chapter should describe virtual interface basics -+ that are relevant to the driver (VLANs, MGMT etc are not.) -+ It should explain the use of the add_iface/remove_iface -+ callbacks as well as the interface configuration callbacks. -+ -+ Things related to AP mode should be discussed there. -+ -+ Things related to supporting multiple interfaces should be -+ in the appropriate chapter, a BIG FAT note should be here about -+ this though and the recommendation to allow only a single -+ interface in STA mode at first! -+ -+!Finclude/net/mac80211.h ieee80211_if_types -+!Finclude/net/mac80211.h ieee80211_if_init_conf -+!Finclude/net/mac80211.h ieee80211_if_conf -+ -+ -+ -+ Receive and transmit processing -+ -+ what should be here -+ TBD -+ -+ This should describe the receive and transmit -+ paths in mac80211/the drivers as well as -+ transmit status handling. -+ -+ -+ -+ Frame format -+!Pinclude/net/mac80211.h Frame format -+ -+ -+ Alignment issues -+ TBD -+ -+ -+ Calling into mac80211 from interrupts -+!Pinclude/net/mac80211.h Calling mac80211 from interrupts -+ -+ -+ functions/definitions -+!Finclude/net/mac80211.h ieee80211_rx_status -+!Finclude/net/mac80211.h mac80211_rx_flags -+!Finclude/net/mac80211.h ieee80211_tx_control -+!Finclude/net/mac80211.h ieee80211_tx_status_flags -+!Finclude/net/mac80211.h ieee80211_rx -+!Finclude/net/mac80211.h ieee80211_rx_irqsafe -+!Finclude/net/mac80211.h ieee80211_tx_status -+!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe -+!Finclude/net/mac80211.h ieee80211_rts_get -+!Finclude/net/mac80211.h ieee80211_rts_duration -+!Finclude/net/mac80211.h ieee80211_ctstoself_get -+!Finclude/net/mac80211.h ieee80211_ctstoself_duration -+!Finclude/net/mac80211.h ieee80211_generic_frame_duration -+!Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb -+!Finclude/net/mac80211.h ieee80211_get_hdrlen -+!Finclude/net/mac80211.h ieee80211_wake_queue -+!Finclude/net/mac80211.h ieee80211_stop_queue -+!Finclude/net/mac80211.h ieee80211_start_queues -+!Finclude/net/mac80211.h ieee80211_stop_queues -+!Finclude/net/mac80211.h ieee80211_wake_queues -+ -+ -+ -+ -+ Frame filtering -+!Pinclude/net/mac80211.h Frame filtering -+!Finclude/net/mac80211.h ieee80211_filter_flags -+ -+ -+ -+ -+ Advanced driver interface -+ -+ -+ Information contained within this part of the book is -+ of interest only for advanced interaction of mac80211 -+ with drivers to exploit more hardware capabilities and -+ improve performance. -+ -+ -+ -+ -+ Hardware crypto acceleration -+!Pinclude/net/mac80211.h Hardware crypto acceleration -+ -+!Finclude/net/mac80211.h set_key_cmd -+!Finclude/net/mac80211.h ieee80211_key_conf -+!Finclude/net/mac80211.h ieee80211_key_alg -+!Finclude/net/mac80211.h ieee80211_key_flags -+ -+ -+ -+ Multiple queues and QoS support -+ TBD -+!Finclude/net/mac80211.h ieee80211_tx_queue_params -+!Finclude/net/mac80211.h ieee80211_tx_queue_stats_data -+!Finclude/net/mac80211.h ieee80211_tx_queue -+ -+ -+ -+ Access point mode support -+ TBD -+ Some parts of the if_conf should be discussed here instead -+ -+ Insert notes about VLAN interfaces with hw crypto here or -+ in the hw crypto chapter. -+ -+!Finclude/net/mac80211.h ieee80211_get_buffered_bc -+!Finclude/net/mac80211.h ieee80211_beacon_get -+ -+ -+ -+ Supporting multiple virtual interfaces -+ TBD -+ -+ Note: WDS with identical MAC address should almost always be OK -+ -+ -+ Insert notes about having multiple virtual interfaces with -+ different MAC addresses here, note which configurations are -+ supported by mac80211, add notes about supporting hw crypto -+ with it. -+ -+ -+ -+ -+ Hardware scan offload -+ TBD -+!Finclude/net/mac80211.h ieee80211_scan_completed -+ -+ -+ -+ -+ Rate control interface -+ -+ TBD -+ -+ This part of the book describes the rate control algorithm -+ interface and how it relates to mac80211 and drivers. -+ -+ -+ -+ dummy chapter -+ TBD -+ -+ -+ -+ -+ Internals -+ -+ TBD -+ -+ This part of the book describes mac80211 internals. -+ -+ -+ -+ -+ Key handling -+ -+ Key handling basics -+!Pnet/mac80211/key.c Key handling basics -+ -+ -+ MORE TBD -+ TBD -+ -+ -+ -+ -+ Receive processing -+ TBD -+ -+ -+ -+ Transmit processing -+ TBD -+ -+ -+ -+ Station info handling -+ -+ Programming information -+!Fnet/mac80211/sta_info.h sta_info -+!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags -+ -+ -+ STA information lifetime rules -+!Pnet/mac80211/sta_info.c STA information lifetime rules -+ -+ -+ -+ -+ Synchronisation -+ TBD -+ Locking, lots of RCU -+ -+ -+
-diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl -index b9e143e..54eb26b 100644 ---- a/Documentation/DocBook/rapidio.tmpl -+++ b/Documentation/DocBook/rapidio.tmpl -@@ -133,7 +133,6 @@ - !Idrivers/rapidio/rio-sysfs.c - - PPC32 support --!Iarch/powerpc/kernel/rio.c - !Earch/powerpc/sysdev/fsl_rio.c - !Iarch/powerpc/sysdev/fsl_rio.c - -diff --git a/Documentation/DocBook/writing_usb_driver.tmpl b/Documentation/DocBook/writing_usb_driver.tmpl -index d4188d4..eeff19c 100644 ---- a/Documentation/DocBook/writing_usb_driver.tmpl -+++ b/Documentation/DocBook/writing_usb_driver.tmpl -@@ -100,8 +100,8 @@ - useful documents, at the USB home page (see Resources). An excellent - introduction to the Linux USB subsystem can be found at the USB Working - Devices List (see Resources). It explains how the Linux USB subsystem is -- structured and introduces the reader to the concept of USB urbs, which -- are essential to USB drivers. -+ structured and introduces the reader to the concept of USB urbs -+ (USB Request Blocks), which are essential to USB drivers. -
- - The first thing a Linux USB driver needs to do is register itself with -@@ -162,8 +162,8 @@ static int __init usb_skel_init(void) - module_init(usb_skel_init); -
- -- When the driver is unloaded from the system, it needs to unregister -- itself with the USB subsystem. This is done with the usb_unregister -+ When the driver is unloaded from the system, it needs to deregister -+ itself with the USB subsystem. This is done with the usb_deregister - function: - - -@@ -232,7 +232,7 @@ static int skel_probe(struct usb_interface *interface, - were passed to the USB subsystem will be called from a user program trying - to talk to the device. The first function called will be open, as the - program tries to open the device for I/O. We increment our private usage -- count and save off a pointer to our internal structure in the file -+ count and save a pointer to our internal structure in the file - structure. This is done so that future calls to file operations will - enable the driver to determine which device the user is addressing. All - of this is done with the following code: -@@ -252,8 +252,8 @@ file->private_data = dev; - send to the device based on the size of the write urb it has created (this - size depends on the size of the bulk out end point that the device has). - Then it copies the data from user space to kernel space, points the urb to -- the data and submits the urb to the USB subsystem. This can be shown in -- he following code: -+ the data and submits the urb to the USB subsystem. This can be seen in -+ the following code: - - - /* we can only write as much as 1 urb will hold */ -diff --git a/Documentation/HOWTO b/Documentation/HOWTO -index 5483561..0291ade 100644 ---- a/Documentation/HOWTO -+++ b/Documentation/HOWTO -@@ -249,9 +249,11 @@ process is as follows: - release a new -rc kernel every week. - - Process continues until the kernel is considered "ready", the - process should last around 6 weeks. -- - A list of known regressions present in each -rc release is -- tracked at the following URI: -- http://kernelnewbies.org/known_regressions -+ - Known regressions in each release are periodically posted to the -+ linux-kernel mailing list. The goal is to reduce the length of -+ that list to zero before declaring the kernel to be "ready," but, in -+ the real world, a small number of regressions often remain at -+ release time. - - It is worth mentioning what Andrew Morton wrote on the linux-kernel - mailing list about kernel releases: -@@ -261,7 +263,7 @@ mailing list about kernel releases: - - 2.6.x.y -stable kernel tree - --------------------------- --Kernels with 4 digit versions are -stable kernels. They contain -+Kernels with 4-part versions are -stable kernels. They contain - relatively small and critical fixes for security problems or significant - regressions discovered in a given 2.6.x kernel. - -@@ -273,7 +275,10 @@ If no 2.6.x.y kernel is available, then the highest numbered 2.6.x - kernel is the current stable kernel. - - 2.6.x.y are maintained by the "stable" team , and are --released almost every other week. -+released as needs dictate. The normal release period is approximately -+two weeks, but it can be longer if there are no pressing problems. A -+security-related problem, instead, can cause a release to happen almost -+instantly. - - The file Documentation/stable_kernel_rules.txt in the kernel tree - documents what kinds of changes are acceptable for the -stable tree, and -@@ -298,7 +303,9 @@ a while Andrew or the subsystem maintainer pushes it on to Linus for - inclusion in mainline. - - It is heavily encouraged that all new patches get tested in the -mm tree --before they are sent to Linus for inclusion in the main kernel tree. -+before they are sent to Linus for inclusion in the main kernel tree. Code -+which does not make an appearance in -mm before the opening of the merge -+window will prove hard to merge into the mainline. - - These kernels are not appropriate for use on systems that are supposed - to be stable and they are more risky to run than any of the other -@@ -354,11 +361,12 @@ Here is a list of some of the different kernel trees available: - - SCSI, James Bottomley - git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git - -+ - x86, Ingo Molnar -+ git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git -+ - quilt trees: -- - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman -+ - USB, Driver Core, and I2C, Greg Kroah-Hartman - kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ -- - x86-64, partly i386, Andi Kleen -- ftp.firstfloor.org:/pub/ak/x86_64/quilt/ - - Other kernel trees can be found listed at http://git.kernel.org/ and in - the MAINTAINERS file. -@@ -392,8 +400,8 @@ If you want to be advised of the future bug reports, you can subscribe to the - bugme-new mailing list (only new bug reports are mailed here) or to the - bugme-janitor mailing list (every change in the bugzilla is mailed here) - -- http://lists.osdl.org/mailman/listinfo/bugme-new -- http://lists.osdl.org/mailman/listinfo/bugme-janitors -+ http://lists.linux-foundation.org/mailman/listinfo/bugme-new -+ http://lists.linux-foundation.org/mailman/listinfo/bugme-janitors - - - -diff --git a/Documentation/PCI/00-INDEX b/Documentation/PCI/00-INDEX -new file mode 100644 -index 0000000..49f4394 ---- /dev/null -+++ b/Documentation/PCI/00-INDEX -@@ -0,0 +1,12 @@ -+00-INDEX -+ - this file -+PCI-DMA-mapping.txt -+ - info for PCI drivers using DMA portably across all platforms -+PCIEBUS-HOWTO.txt -+ - a guide describing the PCI Express Port Bus driver -+pci-error-recovery.txt -+ - info on PCI error recovery -+pci.txt -+ - info on the PCI subsystem for device driver authors -+pcieaer-howto.txt -+ - the PCI Express Advanced Error Reporting Driver Guide HOWTO -diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt -new file mode 100644 -index 0000000..9a07e38 ---- /dev/null -+++ b/Documentation/PCI/PCIEBUS-HOWTO.txt -@@ -0,0 +1,217 @@ -+ The PCI Express Port Bus Driver Guide HOWTO -+ Tom L Nguyen tom.l.nguyen@intel.com -+ 11/03/2004 -+ -+1. About this guide -+ -+This guide describes the basics of the PCI Express Port Bus driver -+and provides information on how to enable the service drivers to -+register/unregister with the PCI Express Port Bus Driver. -+ -+2. Copyright 2004 Intel Corporation -+ -+3. What is the PCI Express Port Bus Driver -+ -+A PCI Express Port is a logical PCI-PCI Bridge structure. There -+are two types of PCI Express Port: the Root Port and the Switch -+Port. The Root Port originates a PCI Express link from a PCI Express -+Root Complex and the Switch Port connects PCI Express links to -+internal logical PCI buses. The Switch Port, which has its secondary -+bus representing the switch's internal routing logic, is called the -+switch's Upstream Port. The switch's Downstream Port is bridging from -+switch's internal routing bus to a bus representing the downstream -+PCI Express link from the PCI Express Switch. -+ -+A PCI Express Port can provide up to four distinct functions, -+referred to in this document as services, depending on its port type. -+PCI Express Port's services include native hotplug support (HP), -+power management event support (PME), advanced error reporting -+support (AER), and virtual channel support (VC). These services may -+be handled by a single complex driver or be individually distributed -+and handled by corresponding service drivers. -+ -+4. Why use the PCI Express Port Bus Driver? -+ -+In existing Linux kernels, the Linux Device Driver Model allows a -+physical device to be handled by only a single driver. The PCI -+Express Port is a PCI-PCI Bridge device with multiple distinct -+services. To maintain a clean and simple solution each service -+may have its own software service driver. In this case several -+service drivers will compete for a single PCI-PCI Bridge device. -+For example, if the PCI Express Root Port native hotplug service -+driver is loaded first, it claims a PCI-PCI Bridge Root Port. The -+kernel therefore does not load other service drivers for that Root -+Port. In other words, it is impossible to have multiple service -+drivers load and run on a PCI-PCI Bridge device simultaneously -+using the current driver model. -+ -+To enable multiple service drivers running simultaneously requires -+having a PCI Express Port Bus driver, which manages all populated -+PCI Express Ports and distributes all provided service requests -+to the corresponding service drivers as required. Some key -+advantages of using the PCI Express Port Bus driver are listed below: -+ -+ - Allow multiple service drivers to run simultaneously on -+ a PCI-PCI Bridge Port device. -+ -+ - Allow service drivers implemented in an independent -+ staged approach. -+ -+ - Allow one service driver to run on multiple PCI-PCI Bridge -+ Port devices. -+ -+ - Manage and distribute resources of a PCI-PCI Bridge Port -+ device to requested service drivers. -+ -+5. Configuring the PCI Express Port Bus Driver vs. Service Drivers -+ -+5.1 Including the PCI Express Port Bus Driver Support into the Kernel -+ -+Including the PCI Express Port Bus driver depends on whether the PCI -+Express support is included in the kernel config. The kernel will -+automatically include the PCI Express Port Bus driver as a kernel -+driver when the PCI Express support is enabled in the kernel. -+ -+5.2 Enabling Service Driver Support -+ -+PCI device drivers are implemented based on Linux Device Driver Model. -+All service drivers are PCI device drivers. As discussed above, it is -+impossible to load any service driver once the kernel has loaded the -+PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver -+Model requires some minimal changes on existing service drivers that -+imposes no impact on the functionality of existing service drivers. -+ -+A service driver is required to use the two APIs shown below to -+register its service with the PCI Express Port Bus driver (see -+section 5.2.1 & 5.2.2). It is important that a service driver -+initializes the pcie_port_service_driver data structure, included in -+header file /include/linux/pcieport_if.h, before calling these APIs. -+Failure to do so will result an identity mismatch, which prevents -+the PCI Express Port Bus driver from loading a service driver. -+ -+5.2.1 pcie_port_service_register -+ -+int pcie_port_service_register(struct pcie_port_service_driver *new) -+ -+This API replaces the Linux Driver Model's pci_module_init API. A -+service driver should always calls pcie_port_service_register at -+module init. Note that after service driver being loaded, calls -+such as pci_enable_device(dev) and pci_set_master(dev) are no longer -+necessary since these calls are executed by the PCI Port Bus driver. -+ -+5.2.2 pcie_port_service_unregister -+ -+void pcie_port_service_unregister(struct pcie_port_service_driver *new) -+ -+pcie_port_service_unregister replaces the Linux Driver Model's -+pci_unregister_driver. It's always called by service driver when a -+module exits. -+ -+5.2.3 Sample Code -+ -+Below is sample service driver code to initialize the port service -+driver data structure. -+ -+static struct pcie_port_service_id service_id[] = { { -+ .vendor = PCI_ANY_ID, -+ .device = PCI_ANY_ID, -+ .port_type = PCIE_RC_PORT, -+ .service_type = PCIE_PORT_SERVICE_AER, -+ }, { /* end: all zeroes */ } -+}; -+ -+static struct pcie_port_service_driver root_aerdrv = { -+ .name = (char *)device_name, -+ .id_table = &service_id[0], -+ -+ .probe = aerdrv_load, -+ .remove = aerdrv_unload, -+ -+ .suspend = aerdrv_suspend, -+ .resume = aerdrv_resume, -+}; -+ -+Below is a sample code for registering/unregistering a service -+driver. -+ -+static int __init aerdrv_service_init(void) -+{ -+ int retval = 0; -+ -+ retval = pcie_port_service_register(&root_aerdrv); -+ if (!retval) { -+ /* -+ * FIX ME -+ */ -+ } -+ return retval; -+} -+ -+static void __exit aerdrv_service_exit(void) -+{ -+ pcie_port_service_unregister(&root_aerdrv); -+} -+ -+module_init(aerdrv_service_init); -+module_exit(aerdrv_service_exit); -+ -+6. Possible Resource Conflicts -+ -+Since all service drivers of a PCI-PCI Bridge Port device are -+allowed to run simultaneously, below lists a few of possible resource -+conflicts with proposed solutions. -+ -+6.1 MSI Vector Resource -+ -+The MSI capability structure enables a device software driver to call -+pci_enable_msi to request MSI based interrupts. Once MSI interrupts -+are enabled on a device, it stays in this mode until a device driver -+calls pci_disable_msi to disable MSI interrupts and revert back to -+INTx emulation mode. Since service drivers of the same PCI-PCI Bridge -+port share the same physical device, if an individual service driver -+calls pci_enable_msi/pci_disable_msi it may result unpredictable -+behavior. For example, two service drivers run simultaneously on the -+same physical Root Port. Both service drivers call pci_enable_msi to -+request MSI based interrupts. A service driver may not know whether -+any other service drivers have run on this Root Port. If either one -+of them calls pci_disable_msi, it puts the other service driver -+in a wrong interrupt mode. -+ -+To avoid this situation all service drivers are not permitted to -+switch interrupt mode on its device. The PCI Express Port Bus driver -+is responsible for determining the interrupt mode and this should be -+transparent to service drivers. Service drivers need to know only -+the vector IRQ assigned to the field irq of struct pcie_device, which -+is passed in when the PCI Express Port Bus driver probes each service -+driver. Service drivers should use (struct pcie_device*)dev->irq to -+call request_irq/free_irq. In addition, the interrupt mode is stored -+in the field interrupt_mode of struct pcie_device. -+ -+6.2 MSI-X Vector Resources -+ -+Similar to the MSI a device driver for an MSI-X capable device can -+call pci_enable_msix to request MSI-X interrupts. All service drivers -+are not permitted to switch interrupt mode on its device. The PCI -+Express Port Bus driver is responsible for determining the interrupt -+mode and this should be transparent to service drivers. Any attempt -+by service driver to call pci_enable_msix/pci_disable_msix may -+result unpredictable behavior. Service drivers should use -+(struct pcie_device*)dev->irq and call request_irq/free_irq. -+ -+6.3 PCI Memory/IO Mapped Regions -+ -+Service drivers for PCI Express Power Management (PME), Advanced -+Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access -+PCI configuration space on the PCI Express port. In all cases the -+registers accessed are independent of each other. This patch assumes -+that all service drivers will be well behaved and not overwrite -+other service driver's configuration settings. -+ -+6.4 PCI Config Registers -+ -+Each service driver runs its PCI config operations on its own -+capability structure except the PCI Express capability structure, in -+which Root Control register and Device Control register are shared -+between PME and AER. This patch assumes that all service drivers -+will be well behaved and not overwrite other service driver's -+configuration settings. -diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt -new file mode 100644 -index 0000000..6650af4 ---- /dev/null -+++ b/Documentation/PCI/pci-error-recovery.txt -@@ -0,0 +1,396 @@ -+ -+ PCI Error Recovery -+ ------------------ -+ February 2, 2006 -+ -+ Current document maintainer: -+ Linas Vepstas -+ -+ -+Many PCI bus controllers are able to detect a variety of hardware -+PCI errors on the bus, such as parity errors on the data and address -+busses, as well as SERR and PERR errors. Some of the more advanced -+chipsets are able to deal with these errors; these include PCI-E chipsets, -+and the PCI-host bridges found on IBM Power4 and Power5-based pSeries -+boxes. A typical action taken is to disconnect the affected device, -+halting all I/O to it. The goal of a disconnection is to avoid system -+corruption; for example, to halt system memory corruption due to DMA's -+to "wild" addresses. Typically, a reconnection mechanism is also -+offered, so that the affected PCI device(s) are reset and put back -+into working condition. The reset phase requires coordination -+between the affected device drivers and the PCI controller chip. -+This document describes a generic API for notifying device drivers -+of a bus disconnection, and then performing error recovery. -+This API is currently implemented in the 2.6.16 and later kernels. -+ -+Reporting and recovery is performed in several steps. First, when -+a PCI hardware error has resulted in a bus disconnect, that event -+is reported as soon as possible to all affected device drivers, -+including multiple instances of a device driver on multi-function -+cards. This allows device drivers to avoid deadlocking in spinloops, -+waiting for some i/o-space register to change, when it never will. -+It also gives the drivers a chance to defer incoming I/O as -+needed. -+ -+Next, recovery is performed in several stages. Most of the complexity -+is forced by the need to handle multi-function devices, that is, -+devices that have multiple device drivers associated with them. -+In the first stage, each driver is allowed to indicate what type -+of reset it desires, the choices being a simple re-enabling of I/O -+or requesting a hard reset (a full electrical #RST of the PCI card). -+If any driver requests a full reset, that is what will be done. -+ -+After a full reset and/or a re-enabling of I/O, all drivers are -+again notified, so that they may then perform any device setup/config -+that may be required. After these have all completed, a final -+"resume normal operations" event is sent out. -+ -+The biggest reason for choosing a kernel-based implementation rather -+than a user-space implementation was the need to deal with bus -+disconnects of PCI devices attached to storage media, and, in particular, -+disconnects from devices holding the root file system. If the root -+file system is disconnected, a user-space mechanism would have to go -+through a large number of contortions to complete recovery. Almost all -+of the current Linux file systems are not tolerant of disconnection -+from/reconnection to their underlying block device. By contrast, -+bus errors are easy to manage in the device driver. Indeed, most -+device drivers already handle very similar recovery procedures; -+for example, the SCSI-generic layer already provides significant -+mechanisms for dealing with SCSI bus errors and SCSI bus resets. -+ -+ -+Detailed Design -+--------------- -+Design and implementation details below, based on a chain of -+public email discussions with Ben Herrenschmidt, circa 5 April 2005. -+ -+The error recovery API support is exposed to the driver in the form of -+a structure of function pointers pointed to by a new field in struct -+pci_driver. A driver that fails to provide the structure is "non-aware", -+and the actual recovery steps taken are platform dependent. The -+arch/powerpc implementation will simulate a PCI hotplug remove/add. -+ -+This structure has the form: -+struct pci_error_handlers -+{ -+ int (*error_detected)(struct pci_dev *dev, enum pci_channel_state); -+ int (*mmio_enabled)(struct pci_dev *dev); -+ int (*link_reset)(struct pci_dev *dev); -+ int (*slot_reset)(struct pci_dev *dev); -+ void (*resume)(struct pci_dev *dev); -+}; -+ -+The possible channel states are: -+enum pci_channel_state { -+ pci_channel_io_normal, /* I/O channel is in normal state */ -+ pci_channel_io_frozen, /* I/O to channel is blocked */ -+ pci_channel_io_perm_failure, /* PCI card is dead */ -+}; -+ -+Possible return values are: -+enum pci_ers_result { -+ PCI_ERS_RESULT_NONE, /* no result/none/not supported in device driver */ -+ PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */ -+ PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */ -+ PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */ -+ PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */ -+}; -+ -+A driver does not have to implement all of these callbacks; however, -+if it implements any, it must implement error_detected(). If a callback -+is not implemented, the corresponding feature is considered unsupported. -+For example, if mmio_enabled() and resume() aren't there, then it -+is assumed that the driver is not doing any direct recovery and requires -+a reset. If link_reset() is not implemented, the card is assumed as -+not care about link resets. Typically a driver will want to know about -+a slot_reset(). -+ -+The actual steps taken by a platform to recover from a PCI error -+event will be platform-dependent, but will follow the general -+sequence described below. -+ -+STEP 0: Error Event -+------------------- -+PCI bus error is detect by the PCI hardware. On powerpc, the slot -+is isolated, in that all I/O is blocked: all reads return 0xffffffff, -+all writes are ignored. -+ -+ -+STEP 1: Notification -+-------------------- -+Platform calls the error_detected() callback on every instance of -+every driver affected by the error. -+ -+At this point, the device might not be accessible anymore, depending on -+the platform (the slot will be isolated on powerpc). The driver may -+already have "noticed" the error because of a failing I/O, but this -+is the proper "synchronization point", that is, it gives the driver -+a chance to cleanup, waiting for pending stuff (timers, whatever, etc...) -+to complete; it can take semaphores, schedule, etc... everything but -+touch the device. Within this function and after it returns, the driver -+shouldn't do any new IOs. Called in task context. This is sort of a -+"quiesce" point. See note about interrupts at the end of this doc. -+ -+All drivers participating in this system must implement this call. -+The driver must return one of the following result codes: -+ - PCI_ERS_RESULT_CAN_RECOVER: -+ Driver returns this if it thinks it might be able to recover -+ the HW by just banging IOs or if it wants to be given -+ a chance to extract some diagnostic information (see -+ mmio_enable, below). -+ - PCI_ERS_RESULT_NEED_RESET: -+ Driver returns this if it can't recover without a hard -+ slot reset. -+ - PCI_ERS_RESULT_DISCONNECT: -+ Driver returns this if it doesn't want to recover at all. -+ -+The next step taken will depend on the result codes returned by the -+drivers. -+ -+If all drivers on the segment/slot return PCI_ERS_RESULT_CAN_RECOVER, -+then the platform should re-enable IOs on the slot (or do nothing in -+particular, if the platform doesn't isolate slots), and recovery -+proceeds to STEP 2 (MMIO Enable). -+ -+If any driver requested a slot reset (by returning PCI_ERS_RESULT_NEED_RESET), -+then recovery proceeds to STEP 4 (Slot Reset). -+ -+If the platform is unable to recover the slot, the next step -+is STEP 6 (Permanent Failure). -+ -+>>> The current powerpc implementation assumes that a device driver will -+>>> *not* schedule or semaphore in this routine; the current powerpc -+>>> implementation uses one kernel thread to notify all devices; -+>>> thus, if one device sleeps/schedules, all devices are affected. -+>>> Doing better requires complex multi-threaded logic in the error -+>>> recovery implementation (e.g. waiting for all notification threads -+>>> to "join" before proceeding with recovery.) This seems excessively -+>>> complex and not worth implementing. -+ -+>>> The current powerpc implementation doesn't much care if the device -+>>> attempts I/O at this point, or not. I/O's will fail, returning -+>>> a value of 0xff on read, and writes will be dropped. If the device -+>>> driver attempts more than 10K I/O's to a frozen adapter, it will -+>>> assume that the device driver has gone into an infinite loop, and -+>>> it will panic the kernel. There doesn't seem to be any other -+>>> way of stopping a device driver that insists on spinning on I/O. -+ -+STEP 2: MMIO Enabled -+------------------- -+The platform re-enables MMIO to the device (but typically not the -+DMA), and then calls the mmio_enabled() callback on all affected -+device drivers. -+ -+This is the "early recovery" call. IOs are allowed again, but DMA is -+not (hrm... to be discussed, I prefer not), with some restrictions. This -+is NOT a callback for the driver to start operations again, only to -+peek/poke at the device, extract diagnostic information, if any, and -+eventually do things like trigger a device local reset or some such, -+but not restart operations. This is callback is made if all drivers on -+a segment agree that they can try to recover and if no automatic link reset -+was performed by the HW. If the platform can't just re-enable IOs without -+a slot reset or a link reset, it wont call this callback, and instead -+will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset) -+ -+>>> The following is proposed; no platform implements this yet: -+>>> Proposal: All I/O's should be done _synchronously_ from within -+>>> this callback, errors triggered by them will be returned via -+>>> the normal pci_check_whatever() API, no new error_detected() -+>>> callback will be issued due to an error happening here. However, -+>>> such an error might cause IOs to be re-blocked for the whole -+>>> segment, and thus invalidate the recovery that other devices -+>>> on the same segment might have done, forcing the whole segment -+>>> into one of the next states, that is, link reset or slot reset. -+ -+The driver should return one of the following result codes: -+ - PCI_ERS_RESULT_RECOVERED -+ Driver returns this if it thinks the device is fully -+ functional and thinks it is ready to start -+ normal driver operations again. There is no -+ guarantee that the driver will actually be -+ allowed to proceed, as another driver on the -+ same segment might have failed and thus triggered a -+ slot reset on platforms that support it. -+ -+ - PCI_ERS_RESULT_NEED_RESET -+ Driver returns this if it thinks the device is not -+ recoverable in it's current state and it needs a slot -+ reset to proceed. -+ -+ - PCI_ERS_RESULT_DISCONNECT -+ Same as above. Total failure, no recovery even after -+ reset driver dead. (To be defined more precisely) -+ -+The next step taken depends on the results returned by the drivers. -+If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform -+proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations). -+ -+If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform -+proceeds to STEP 4 (Slot Reset) -+ -+>>> The current powerpc implementation does not implement this callback. -+ -+ -+STEP 3: Link Reset -+------------------ -+The platform resets the link, and then calls the link_reset() callback -+on all affected device drivers. This is a PCI-Express specific state -+and is done whenever a non-fatal error has been detected that can be -+"solved" by resetting the link. This call informs the driver of the -+reset and the driver should check to see if the device appears to be -+in working condition. -+ -+The driver is not supposed to restart normal driver I/O operations -+at this point. It should limit itself to "probing" the device to -+check it's recoverability status. If all is right, then the platform -+will call resume() once all drivers have ack'd link_reset(). -+ -+ Result codes: -+ (identical to STEP 3 (MMIO Enabled) -+ -+The platform then proceeds to either STEP 4 (Slot Reset) or STEP 5 -+(Resume Operations). -+ -+>>> The current powerpc implementation does not implement this callback. -+ -+ -+STEP 4: Slot Reset -+------------------ -+The platform performs a soft or hard reset of the device, and then -+calls the slot_reset() callback. -+ -+A soft reset consists of asserting the adapter #RST line and then -+restoring the PCI BAR's and PCI configuration header to a state -+that is equivalent to what it would be after a fresh system -+power-on followed by power-on BIOS/system firmware initialization. -+If the platform supports PCI hotplug, then the reset might be -+performed by toggling the slot electrical power off/on. -+ -+It is important for the platform to restore the PCI config space -+to the "fresh poweron" state, rather than the "last state". After -+a slot reset, the device driver will almost always use its standard -+device initialization routines, and an unusual config space setup -+may result in hung devices, kernel panics, or silent data corruption. -+ -+This call gives drivers the chance to re-initialize the hardware -+(re-download firmware, etc.). At this point, the driver may assume -+that he card is in a fresh state and is fully functional. In -+particular, interrupt generation should work normally. -+ -+Drivers should not yet restart normal I/O processing operations -+at this point. If all device drivers report success on this -+callback, the platform will call resume() to complete the sequence, -+and let the driver restart normal I/O processing. -+ -+A driver can still return a critical failure for this function if -+it can't get the device operational after reset. If the platform -+previously tried a soft reset, it might now try a hard reset (power -+cycle) and then call slot_reset() again. It the device still can't -+be recovered, there is nothing more that can be done; the platform -+will typically report a "permanent failure" in such a case. The -+device will be considered "dead" in this case. -+ -+Drivers for multi-function cards will need to coordinate among -+themselves as to which driver instance will perform any "one-shot" -+or global device initialization. For example, the Symbios sym53cxx2 -+driver performs device init only from PCI function 0: -+ -++ if (PCI_FUNC(pdev->devfn) == 0) -++ sym_reset_scsi_bus(np, 0); -+ -+ Result codes: -+ - PCI_ERS_RESULT_DISCONNECT -+ Same as above. -+ -+Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent -+Failure). -+ -+>>> The current powerpc implementation does not currently try a -+>>> power-cycle reset if the driver returned PCI_ERS_RESULT_DISCONNECT. -+>>> However, it probably should. -+ -+ -+STEP 5: Resume Operations -+------------------------- -+The platform will call the resume() callback on all affected device -+drivers if all drivers on the segment have returned -+PCI_ERS_RESULT_RECOVERED from one of the 3 previous callbacks. -+The goal of this callback is to tell the driver to restart activity, -+that everything is back and running. This callback does not return -+a result code. -+ -+At this point, if a new error happens, the platform will restart -+a new error recovery sequence. -+ -+STEP 6: Permanent Failure -+------------------------- -+A "permanent failure" has occurred, and the platform cannot recover -+the device. The platform will call error_detected() with a -+pci_channel_state value of pci_channel_io_perm_failure. -+ -+The device driver should, at this point, assume the worst. It should -+cancel all pending I/O, refuse all new I/O, returning -EIO to -+higher layers. The device driver should then clean up all of its -+memory and remove itself from kernel operations, much as it would -+during system shutdown. -+ -+The platform will typically notify the system operator of the -+permanent failure in some way. If the device is hotplug-capable, -+the operator will probably want to remove and replace the device. -+Note, however, not all failures are truly "permanent". Some are -+caused by over-heating, some by a poorly seated card. Many -+PCI error events are caused by software bugs, e.g. DMA's to -+wild addresses or bogus split transactions due to programming -+errors. See the discussion in powerpc/eeh-pci-error-recovery.txt -+for additional detail on real-life experience of the causes of -+software errors. -+ -+ -+Conclusion; General Remarks -+--------------------------- -+The way those callbacks are called is platform policy. A platform with -+no slot reset capability may want to just "ignore" drivers that can't -+recover (disconnect them) and try to let other cards on the same segment -+recover. Keep in mind that in most real life cases, though, there will -+be only one driver per segment. -+ -+Now, a note about interrupts. If you get an interrupt and your -+device is dead or has been isolated, there is a problem :) -+The current policy is to turn this into a platform policy. -+That is, the recovery API only requires that: -+ -+ - There is no guarantee that interrupt delivery can proceed from any -+device on the segment starting from the error detection and until the -+resume callback is sent, at which point interrupts are expected to be -+fully operational. -+ -+ - There is no guarantee that interrupt delivery is stopped, that is, -+a driver that gets an interrupt after detecting an error, or that detects -+an error within the interrupt handler such that it prevents proper -+ack'ing of the interrupt (and thus removal of the source) should just -+return IRQ_NOTHANDLED. It's up to the platform to deal with that -+condition, typically by masking the IRQ source during the duration of -+the error handling. It is expected that the platform "knows" which -+interrupts are routed to error-management capable slots and can deal -+with temporarily disabling that IRQ number during error processing (this -+isn't terribly complex). That means some IRQ latency for other devices -+sharing the interrupt, but there is simply no other way. High end -+platforms aren't supposed to share interrupts between many devices -+anyway :) -+ -+>>> Implementation details for the powerpc platform are discussed in -+>>> the file Documentation/powerpc/eeh-pci-error-recovery.txt -+ -+>>> As of this writing, there are six device drivers with patches -+>>> implementing error recovery. Not all of these patches are in -+>>> mainline yet. These may be used as "examples": -+>>> -+>>> drivers/scsi/ipr.c -+>>> drivers/scsi/sym53cxx_2 -+>>> drivers/next/e100.c -+>>> drivers/net/e1000 -+>>> drivers/net/ixgb -+>>> drivers/net/s2io.c -+ -+The End -+------- -diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt -new file mode 100644 -index 0000000..8d4dc62 ---- /dev/null -+++ b/Documentation/PCI/pci.txt -@@ -0,0 +1,646 @@ -+ -+ How To Write Linux PCI Drivers -+ -+ by Martin Mares on 07-Feb-2000 -+ updated by Grant Grundler on 23-Dec-2006 -+ -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+The world of PCI is vast and full of (mostly unpleasant) surprises. -+Since each CPU architecture implements different chip-sets and PCI devices -+have different requirements (erm, "features"), the result is the PCI support -+in the Linux kernel is not as trivial as one would wish. This short paper -+tries to introduce all potential driver authors to Linux APIs for -+PCI device drivers. -+ -+A more complete resource is the third edition of "Linux Device Drivers" -+by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman. -+LDD3 is available for free (under Creative Commons License) from: -+ -+ http://lwn.net/Kernel/LDD3/ -+ -+However, keep in mind that all documents are subject to "bit rot". -+Refer to the source code if things are not working as described here. -+ -+Please send questions/comments/patches about Linux PCI API to the -+"Linux PCI" mailing list. -+ -+ -+ -+0. Structure of PCI drivers -+~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+PCI drivers "discover" PCI devices in a system via pci_register_driver(). -+Actually, it's the other way around. When the PCI generic code discovers -+a new device, the driver with a matching "description" will be notified. -+Details on this below. -+ -+pci_register_driver() leaves most of the probing for devices to -+the PCI layer and supports online insertion/removal of devices [thus -+supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver]. -+pci_register_driver() call requires passing in a table of function -+pointers and thus dictates the high level structure of a driver. -+ -+Once the driver knows about a PCI device and takes ownership, the -+driver generally needs to perform the following initialization: -+ -+ Enable the device -+ Request MMIO/IOP resources -+ Set the DMA mask size (for both coherent and streaming DMA) -+ Allocate and initialize shared control data (pci_allocate_coherent()) -+ Access device configuration space (if needed) -+ Register IRQ handler (request_irq()) -+ Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) -+ Enable DMA/processing engines -+ -+When done using the device, and perhaps the module needs to be unloaded, -+the driver needs to take the follow steps: -+ Disable the device from generating IRQs -+ Release the IRQ (free_irq()) -+ Stop all DMA activity -+ Release DMA buffers (both streaming and coherent) -+ Unregister from other subsystems (e.g. scsi or netdev) -+ Release MMIO/IOP resources -+ Disable the device -+ -+Most of these topics are covered in the following sections. -+For the rest look at LDD3 or . -+ -+If the PCI subsystem is not configured (CONFIG_PCI is not set), most of -+the PCI functions described below are defined as inline functions either -+completely empty or just returning an appropriate error codes to avoid -+lots of ifdefs in the drivers. -+ -+ -+ -+1. pci_register_driver() call -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+PCI device drivers call pci_register_driver() during their -+initialization with a pointer to a structure describing the driver -+(struct pci_driver): -+ -+ field name Description -+ ---------- ------------------------------------------------------ -+ id_table Pointer to table of device ID's the driver is -+ interested in. Most drivers should export this -+ table using MODULE_DEVICE_TABLE(pci,...). -+ -+ probe This probing function gets called (during execution -+ of pci_register_driver() for already existing -+ devices or later if a new device gets inserted) for -+ all PCI devices which match the ID table and are not -+ "owned" by the other drivers yet. This function gets -+ passed a "struct pci_dev *" for each device whose -+ entry in the ID table matches the device. The probe -+ function returns zero when the driver chooses to -+ take "ownership" of the device or an error code -+ (negative number) otherwise. -+ The probe function always gets called from process -+ context, so it can sleep. -+ -+ remove The remove() function gets called whenever a device -+ being handled by this driver is removed (either during -+ deregistration of the driver or when it's manually -+ pulled out of a hot-pluggable slot). -+ The remove function always gets called from process -+ context, so it can sleep. -+ -+ suspend Put device into low power state. -+ suspend_late Put device into low power state. -+ -+ resume_early Wake device from low power state. -+ resume Wake device from low power state. -+ -+ (Please see Documentation/power/pci.txt for descriptions -+ of PCI Power Management and the related functions.) -+ -+ shutdown Hook into reboot_notifier_list (kernel/sys.c). -+ Intended to stop any idling DMA operations. -+ Useful for enabling wake-on-lan (NIC) or changing -+ the power state of a device before reboot. -+ e.g. drivers/net/e100.c. -+ -+ err_handler See Documentation/PCI/pci-error-recovery.txt -+ -+ -+The ID table is an array of struct pci_device_id entries ending with an -+all-zero entry; use of the macro DEFINE_PCI_DEVICE_TABLE is the preferred -+method of declaring the table. Each entry consists of: -+ -+ vendor,device Vendor and device ID to match (or PCI_ANY_ID) -+ -+ subvendor, Subsystem vendor and device ID to match (or PCI_ANY_ID) -+ subdevice, -+ -+ class Device class, subclass, and "interface" to match. -+ See Appendix D of the PCI Local Bus Spec or -+ include/linux/pci_ids.h for a full list of classes. -+ Most drivers do not need to specify class/class_mask -+ as vendor/device is normally sufficient. -+ -+ class_mask limit which sub-fields of the class field are compared. -+ See drivers/scsi/sym53c8xx_2/ for example of usage. -+ -+ driver_data Data private to the driver. -+ Most drivers don't need to use driver_data field. -+ Best practice is to use driver_data as an index -+ into a static list of equivalent device types, -+ instead of using it as a pointer. -+ -+ -+Most drivers only need PCI_DEVICE() or PCI_DEVICE_CLASS() to set up -+a pci_device_id table. -+ -+New PCI IDs may be added to a device driver pci_ids table at runtime -+as shown below: -+ -+echo "vendor device subvendor subdevice class class_mask driver_data" > \ -+/sys/bus/pci/drivers/{driver}/new_id -+ -+All fields are passed in as hexadecimal values (no leading 0x). -+The vendor and device fields are mandatory, the others are optional. Users -+need pass only as many optional fields as necessary: -+ o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) -+ o class and classmask fields default to 0 -+ o driver_data defaults to 0UL. -+ -+Once added, the driver probe routine will be invoked for any unclaimed -+PCI devices listed in its (newly updated) pci_ids list. -+ -+When the driver exits, it just calls pci_unregister_driver() and the PCI layer -+automatically calls the remove hook for all devices handled by the driver. -+ -+ -+1.1 "Attributes" for driver functions/data -+ -+Please mark the initialization and cleanup functions where appropriate -+(the corresponding macros are defined in ): -+ -+ __init Initialization code. Thrown away after the driver -+ initializes. -+ __exit Exit code. Ignored for non-modular drivers. -+ -+ -+ __devinit Device initialization code. -+ Identical to __init if the kernel is not compiled -+ with CONFIG_HOTPLUG, normal function otherwise. -+ __devexit The same for __exit. -+ -+Tips on when/where to use the above attributes: -+ o The module_init()/module_exit() functions (and all -+ initialization functions called _only_ from these) -+ should be marked __init/__exit. -+ -+ o Do not mark the struct pci_driver. -+ -+ o The ID table array should be marked __devinitconst; this is done -+ automatically if the table is declared with DEFINE_PCI_DEVICE_TABLE(). -+ -+ o The probe() and remove() functions should be marked __devinit -+ and __devexit respectively. All initialization functions -+ exclusively called by the probe() routine, can be marked __devinit. -+ Ditto for remove() and __devexit. -+ -+ o If mydriver_remove() is marked with __devexit(), then all address -+ references to mydriver_remove must use __devexit_p(mydriver_remove) -+ (in the struct pci_driver declaration for example). -+ __devexit_p() will generate the function name _or_ NULL if the -+ function will be discarded. For an example, see drivers/net/tg3.c. -+ -+ o Do NOT mark a function if you are not sure which mark to use. -+ Better to not mark the function than mark the function wrong. -+ -+ -+ -+2. How to find PCI devices manually -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+PCI drivers should have a really good reason for not using the -+pci_register_driver() interface to search for PCI devices. -+The main reason PCI devices are controlled by multiple drivers -+is because one PCI device implements several different HW services. -+E.g. combined serial/parallel port/floppy controller. -+ -+A manual search may be performed using the following constructs: -+ -+Searching by vendor and device ID: -+ -+ struct pci_dev *dev = NULL; -+ while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev)) -+ configure_device(dev); -+ -+Searching by class ID (iterate in a similar way): -+ -+ pci_get_class(CLASS_ID, dev) -+ -+Searching by both vendor/device and subsystem vendor/device ID: -+ -+ pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev). -+ -+You can use the constant PCI_ANY_ID as a wildcard replacement for -+VENDOR_ID or DEVICE_ID. This allows searching for any device from a -+specific vendor, for example. -+ -+These functions are hotplug-safe. They increment the reference count on -+the pci_dev that they return. You must eventually (possibly at module unload) -+decrement the reference count on these devices by calling pci_dev_put(). -+ -+ -+ -+3. Device Initialization Steps -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+As noted in the introduction, most PCI drivers need the following steps -+for device initialization: -+ -+ Enable the device -+ Request MMIO/IOP resources -+ Set the DMA mask size (for both coherent and streaming DMA) -+ Allocate and initialize shared control data (pci_allocate_coherent()) -+ Access device configuration space (if needed) -+ Register IRQ handler (request_irq()) -+ Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) -+ Enable DMA/processing engines. -+ -+The driver can access PCI config space registers at any time. -+(Well, almost. When running BIST, config space can go away...but -+that will just result in a PCI Bus Master Abort and config reads -+will return garbage). -+ -+ -+3.1 Enable the PCI device -+~~~~~~~~~~~~~~~~~~~~~~~~~ -+Before touching any device registers, the driver needs to enable -+the PCI device by calling pci_enable_device(). This will: -+ o wake up the device if it was in suspended state, -+ o allocate I/O and memory regions of the device (if BIOS did not), -+ o allocate an IRQ (if BIOS did not). -+ -+NOTE: pci_enable_device() can fail! Check the return value. -+ -+[ OS BUG: we don't check resource allocations before enabling those -+ resources. The sequence would make more sense if we called -+ pci_request_resources() before calling pci_enable_device(). -+ Currently, the device drivers can't detect the bug when when two -+ devices have been allocated the same range. This is not a common -+ problem and unlikely to get fixed soon. -+ -+ This has been discussed before but not changed as of 2.6.19: -+ http://lkml.org/lkml/2006/3/2/194 -+] -+ -+pci_set_master() will enable DMA by setting the bus master bit -+in the PCI_COMMAND register. It also fixes the latency timer value if -+it's set to something bogus by the BIOS. -+ -+If the PCI device can use the PCI Memory-Write-Invalidate transaction, -+call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval -+and also ensures that the cache line size register is set correctly. -+Check the return value of pci_set_mwi() as not all architectures -+or chip-sets may support Memory-Write-Invalidate. Alternatively, -+if Mem-Wr-Inval would be nice to have but is not required, call -+pci_try_set_mwi() to have the system do its best effort at enabling -+Mem-Wr-Inval. -+ -+ -+3.2 Request MMIO/IOP resources -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+Memory (MMIO), and I/O port addresses should NOT be read directly -+from the PCI device config space. Use the values in the pci_dev structure -+as the PCI "bus address" might have been remapped to a "host physical" -+address by the arch/chip-set specific kernel support. -+ -+See Documentation/IO-mapping.txt for how to access device registers -+or device memory. -+ -+The device driver needs to call pci_request_region() to verify -+no other device is already using the same address resource. -+Conversely, drivers should call pci_release_region() AFTER -+calling pci_disable_device(). -+The idea is to prevent two devices colliding on the same address range. -+ -+[ See OS BUG comment above. Currently (2.6.19), The driver can only -+ determine MMIO and IO Port resource availability _after_ calling -+ pci_enable_device(). ] -+ -+Generic flavors of pci_request_region() are request_mem_region() -+(for MMIO ranges) and request_region() (for IO Port ranges). -+Use these for address resources that are not described by "normal" PCI -+BARs. -+ -+Also see pci_request_selected_regions() below. -+ -+ -+3.3 Set the DMA mask size -+~~~~~~~~~~~~~~~~~~~~~~~~~ -+[ If anything below doesn't make sense, please refer to -+ Documentation/DMA-API.txt. This section is just a reminder that -+ drivers need to indicate DMA capabilities of the device and is not -+ an authoritative source for DMA interfaces. ] -+ -+While all drivers should explicitly indicate the DMA capability -+(e.g. 32 or 64 bit) of the PCI bus master, devices with more than -+32-bit bus master capability for streaming data need the driver -+to "register" this capability by calling pci_set_dma_mask() with -+appropriate parameters. In general this allows more efficient DMA -+on systems where System RAM exists above 4G _physical_ address. -+ -+Drivers for all PCI-X and PCIe compliant devices must call -+pci_set_dma_mask() as they are 64-bit DMA devices. -+ -+Similarly, drivers must also "register" this capability if the device -+can directly address "consistent memory" in System RAM above 4G physical -+address by calling pci_set_consistent_dma_mask(). -+Again, this includes drivers for all PCI-X and PCIe compliant devices. -+Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are -+64-bit DMA capable for payload ("streaming") data but not control -+("consistent") data. -+ -+ -+3.4 Setup shared control data -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared) -+memory. See Documentation/DMA-API.txt for a full description of -+the DMA APIs. This section is just a reminder that it needs to be done -+before enabling DMA on the device. -+ -+ -+3.5 Initialize device registers -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+Some drivers will need specific "capability" fields programmed -+or other "vendor specific" register initialized or reset. -+E.g. clearing pending interrupts. -+ -+ -+3.6 Register IRQ handler -+~~~~~~~~~~~~~~~~~~~~~~~~ -+While calling request_irq() is the last step described here, -+this is often just another intermediate step to initialize a device. -+This step can often be deferred until the device is opened for use. -+ -+All interrupt handlers for IRQ lines should be registered with IRQF_SHARED -+and use the devid to map IRQs to devices (remember that all PCI IRQ lines -+can be shared). -+ -+request_irq() will associate an interrupt handler and device handle -+with an interrupt number. Historically interrupt numbers represent -+IRQ lines which run from the PCI device to the Interrupt controller. -+With MSI and MSI-X (more below) the interrupt number is a CPU "vector". -+ -+request_irq() also enables the interrupt. Make sure the device is -+quiesced and does not have any interrupts pending before registering -+the interrupt handler. -+ -+MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts" -+which deliver interrupts to the CPU via a DMA write to a Local APIC. -+The fundamental difference between MSI and MSI-X is how multiple -+"vectors" get allocated. MSI requires contiguous blocks of vectors -+while MSI-X can allocate several individual ones. -+ -+MSI capability can be enabled by calling pci_enable_msi() or -+pci_enable_msix() before calling request_irq(). This causes -+the PCI support to program CPU vector data into the PCI device -+capability registers. -+ -+If your PCI device supports both, try to enable MSI-X first. -+Only one can be enabled at a time. Many architectures, chip-sets, -+or BIOSes do NOT support MSI or MSI-X and the call to pci_enable_msi/msix -+will fail. This is important to note since many drivers have -+two (or more) interrupt handlers: one for MSI/MSI-X and another for IRQs. -+They choose which handler to register with request_irq() based on the -+return value from pci_enable_msi/msix(). -+ -+There are (at least) two really good reasons for using MSI: -+1) MSI is an exclusive interrupt vector by definition. -+ This means the interrupt handler doesn't have to verify -+ its device caused the interrupt. -+ -+2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed -+ to be visible to the host CPU(s) when the MSI is delivered. This -+ is important for both data coherency and avoiding stale control data. -+ This guarantee allows the driver to omit MMIO reads to flush -+ the DMA stream. -+ -+See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples -+of MSI/MSI-X usage. -+ -+ -+ -+4. PCI device shutdown -+~~~~~~~~~~~~~~~~~~~~~~~ -+ -+When a PCI device driver is being unloaded, most of the following -+steps need to be performed: -+ -+ Disable the device from generating IRQs -+ Release the IRQ (free_irq()) -+ Stop all DMA activity -+ Release DMA buffers (both streaming and consistent) -+ Unregister from other subsystems (e.g. scsi or netdev) -+ Disable device from responding to MMIO/IO Port addresses -+ Release MMIO/IO Port resource(s) -+ -+ -+4.1 Stop IRQs on the device -+~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+How to do this is chip/device specific. If it's not done, it opens -+the possibility of a "screaming interrupt" if (and only if) -+the IRQ is shared with another device. -+ -+When the shared IRQ handler is "unhooked", the remaining devices -+using the same IRQ line will still need the IRQ enabled. Thus if the -+"unhooked" device asserts IRQ line, the system will respond assuming -+it was one of the remaining devices asserted the IRQ line. Since none -+of the other devices will handle the IRQ, the system will "hang" until -+it decides the IRQ isn't going to get handled and masks the IRQ (100,000 -+iterations later). Once the shared IRQ is masked, the remaining devices -+will stop functioning properly. Not a nice situation. -+ -+This is another reason to use MSI or MSI-X if it's available. -+MSI and MSI-X are defined to be exclusive interrupts and thus -+are not susceptible to the "screaming interrupt" problem. -+ -+ -+4.2 Release the IRQ -+~~~~~~~~~~~~~~~~~~~ -+Once the device is quiesced (no more IRQs), one can call free_irq(). -+This function will return control once any pending IRQs are handled, -+"unhook" the drivers IRQ handler from that IRQ, and finally release -+the IRQ if no one else is using it. -+ -+ -+4.3 Stop all DMA activity -+~~~~~~~~~~~~~~~~~~~~~~~~~ -+It's extremely important to stop all DMA operations BEFORE attempting -+to deallocate DMA control data. Failure to do so can result in memory -+corruption, hangs, and on some chip-sets a hard crash. -+ -+Stopping DMA after stopping the IRQs can avoid races where the -+IRQ handler might restart DMA engines. -+ -+While this step sounds obvious and trivial, several "mature" drivers -+didn't get this step right in the past. -+ -+ -+4.4 Release DMA buffers -+~~~~~~~~~~~~~~~~~~~~~~~ -+Once DMA is stopped, clean up streaming DMA first. -+I.e. unmap data buffers and return buffers to "upstream" -+owners if there is one. -+ -+Then clean up "consistent" buffers which contain the control data. -+ -+See Documentation/DMA-API.txt for details on unmapping interfaces. -+ -+ -+4.5 Unregister from other subsystems -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+Most low level PCI device drivers support some other subsystem -+like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your -+driver isn't losing resources from that other subsystem. -+If this happens, typically the symptom is an Oops (panic) when -+the subsystem attempts to call into a driver that has been unloaded. -+ -+ -+4.6 Disable Device from responding to MMIO/IO Port addresses -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+io_unmap() MMIO or IO Port resources and then call pci_disable_device(). -+This is the symmetric opposite of pci_enable_device(). -+Do not access device registers after calling pci_disable_device(). -+ -+ -+4.7 Release MMIO/IO Port Resource(s) -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+Call pci_release_region() to mark the MMIO or IO Port range as available. -+Failure to do so usually results in the inability to reload the driver. -+ -+ -+ -+5. How to access PCI config space -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+You can use pci_(read|write)_config_(byte|word|dword) to access the config -+space of a device represented by struct pci_dev *. All these functions return 0 -+when successful or an error code (PCIBIOS_...) which can be translated to a text -+string by pcibios_strerror. Most drivers expect that accesses to valid PCI -+devices don't fail. -+ -+If you don't have a struct pci_dev available, you can call -+pci_bus_(read|write)_config_(byte|word|dword) to access a given device -+and function on that bus. -+ -+If you access fields in the standard portion of the config header, please -+use symbolic names of locations and bits declared in . -+ -+If you need to access Extended PCI Capability registers, just call -+pci_find_capability() for the particular capability and it will find the -+corresponding register block for you. -+ -+ -+ -+6. Other interesting functions -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+pci_find_slot() Find pci_dev corresponding to given bus and -+ slot numbers. -+pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) -+pci_find_capability() Find specified capability in device's capability -+ list. -+pci_resource_start() Returns bus start address for a given PCI region -+pci_resource_end() Returns bus end address for a given PCI region -+pci_resource_len() Returns the byte length of a PCI region -+pci_set_drvdata() Set private driver data pointer for a pci_dev -+pci_get_drvdata() Return private driver data pointer for a pci_dev -+pci_set_mwi() Enable Memory-Write-Invalidate transactions. -+pci_clear_mwi() Disable Memory-Write-Invalidate transactions. -+ -+ -+ -+7. Miscellaneous hints -+~~~~~~~~~~~~~~~~~~~~~~ -+ -+When displaying PCI device names to the user (for example when a driver wants -+to tell the user what card has it found), please use pci_name(pci_dev). -+ -+Always refer to the PCI devices by a pointer to the pci_dev structure. -+All PCI layer functions use this identification and it's the only -+reasonable one. Don't use bus/slot/function numbers except for very -+special purposes -- on systems with multiple primary buses their semantics -+can be pretty complex. -+ -+Don't try to turn on Fast Back to Back writes in your driver. All devices -+on the bus need to be capable of doing it, so this is something which needs -+to be handled by platform and generic code, not individual drivers. -+ -+ -+ -+8. Vendor and device identifications -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+One is not not required to add new device ids to include/linux/pci_ids.h. -+Please add PCI_VENDOR_ID_xxx for vendors and a hex constant for device ids. -+ -+PCI_VENDOR_ID_xxx constants are re-used. The device ids are arbitrary -+hex numbers (vendor controlled) and normally used only in a single -+location, the pci_device_id table. -+ -+Please DO submit new vendor/device ids to pciids.sourceforge.net project. -+ -+ -+ -+9. Obsolete functions -+~~~~~~~~~~~~~~~~~~~~~ -+ -+There are several functions which you might come across when trying to -+port an old driver to the new PCI interface. They are no longer present -+in the kernel as they aren't compatible with hotplug or PCI domains or -+having sane locking. -+ -+pci_find_device() Superseded by pci_get_device() -+pci_find_subsys() Superseded by pci_get_subsys() -+pci_find_slot() Superseded by pci_get_slot() -+ -+ -+The alternative is the traditional PCI device driver that walks PCI -+device lists. This is still possible but discouraged. -+ -+ -+ -+10. MMIO Space and "Write Posting" -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+Converting a driver from using I/O Port space to using MMIO space -+often requires some additional changes. Specifically, "write posting" -+needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2) -+already do this. I/O Port space guarantees write transactions reach the PCI -+device before the CPU can continue. Writes to MMIO space allow the CPU -+to continue before the transaction reaches the PCI device. HW weenies -+call this "Write Posting" because the write completion is "posted" to -+the CPU before the transaction has reached its destination. -+ -+Thus, timing sensitive code should add readl() where the CPU is -+expected to wait before doing other work. The classic "bit banging" -+sequence works fine for I/O Port space: -+ -+ for (i = 8; --i; val >>= 1) { -+ outb(val & 1, ioport_reg); /* write bit */ -+ udelay(10); -+ } -+ -+The same sequence for MMIO space should be: -+ -+ for (i = 8; --i; val >>= 1) { -+ writeb(val & 1, mmio_reg); /* write bit */ -+ readb(safe_mmio_reg); /* flush posted write */ -+ udelay(10); -+ } -+ -+It is important that "safe_mmio_reg" not have any side effects that -+interferes with the correct operation of the device. -+ -+Another case to watch out for is when resetting a PCI device. Use PCI -+Configuration space reads to flush the writel(). This will gracefully -+handle the PCI master abort on all platforms if the PCI device is -+expected to not respond to a readl(). Most x86 platforms will allow -+MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage -+(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail"). -+ -diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt -new file mode 100644 -index 0000000..16c2512 ---- /dev/null -+++ b/Documentation/PCI/pcieaer-howto.txt -@@ -0,0 +1,253 @@ -+ The PCI Express Advanced Error Reporting Driver Guide HOWTO -+ T. Long Nguyen -+ Yanmin Zhang -+ 07/29/2006 -+ -+ -+1. Overview -+ -+1.1 About this guide -+ -+This guide describes the basics of the PCI Express Advanced Error -+Reporting (AER) driver and provides information on how to use it, as -+well as how to enable the drivers of endpoint devices to conform with -+PCI Express AER driver. -+ -+1.2 Copyright Intel Corporation 2006. -+ -+1.3 What is the PCI Express AER Driver? -+ -+PCI Express error signaling can occur on the PCI Express link itself -+or on behalf of transactions initiated on the link. PCI Express -+defines two error reporting paradigms: the baseline capability and -+the Advanced Error Reporting capability. The baseline capability is -+required of all PCI Express components providing a minimum defined -+set of error reporting requirements. Advanced Error Reporting -+capability is implemented with a PCI Express advanced error reporting -+extended capability structure providing more robust error reporting. -+ -+The PCI Express AER driver provides the infrastructure to support PCI -+Express Advanced Error Reporting capability. The PCI Express AER -+driver provides three basic functions: -+ -+- Gathers the comprehensive error information if errors occurred. -+- Reports error to the users. -+- Performs error recovery actions. -+ -+AER driver only attaches root ports which support PCI-Express AER -+capability. -+ -+ -+2. User Guide -+ -+2.1 Include the PCI Express AER Root Driver into the Linux Kernel -+ -+The PCI Express AER Root driver is a Root Port service driver attached -+to the PCI Express Port Bus driver. If a user wants to use it, the driver -+has to be compiled. Option CONFIG_PCIEAER supports this capability. It -+depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and -+CONFIG_PCIEAER = y. -+ -+2.2 Load PCI Express AER Root Driver -+There is a case where a system has AER support in BIOS. Enabling the AER -+Root driver and having AER support in BIOS may result unpredictable -+behavior. To avoid this conflict, a successful load of the AER Root driver -+requires ACPI _OSC support in the BIOS to allow the AER Root driver to -+request for native control of AER. See the PCI FW 3.0 Specification for -+details regarding OSC usage. Currently, lots of firmwares don't provide -+_OSC support while they use PCI Express. To support such firmwares, -+forceload, a parameter of type bool, could enable AER to continue to -+be initiated although firmwares have no _OSC support. To enable the -+walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line -+when booting kernel. Note that forceload=n by default. -+ -+2.3 AER error output -+When a PCI-E AER error is captured, an error message will be outputed to -+console. If it's a correctable error, it is outputed as a warning. -+Otherwise, it is printed as an error. So users could choose different -+log level to filter out correctable error messages. -+ -+Below shows an example. -++------ PCI-Express Device Error -----+ -+Error Severity : Uncorrected (Fatal) -+PCIE Bus Error type : Transaction Layer -+Unsupported Request : First -+Requester ID : 0500 -+VendorID=8086h, DeviceID=0329h, Bus=05h, Device=00h, Function=00h -+TLB Header: -+04000001 00200a03 05010000 00050100 -+ -+In the example, 'Requester ID' means the ID of the device who sends -+the error message to root port. Pls. refer to pci express specs for -+other fields. -+ -+ -+3. Developer Guide -+ -+To enable AER aware support requires a software driver to configure -+the AER capability structure within its device and to provide callbacks. -+ -+To support AER better, developers need understand how AER does work -+firstly. -+ -+PCI Express errors are classified into two types: correctable errors -+and uncorrectable errors. This classification is based on the impacts -+of those errors, which may result in degraded performance or function -+failure. -+ -+Correctable errors pose no impacts on the functionality of the -+interface. The PCI Express protocol can recover without any software -+intervention or any loss of data. These errors are detected and -+corrected by hardware. Unlike correctable errors, uncorrectable -+errors impact functionality of the interface. Uncorrectable errors -+can cause a particular transaction or a particular PCI Express link -+to be unreliable. Depending on those error conditions, uncorrectable -+errors are further classified into non-fatal errors and fatal errors. -+Non-fatal errors cause the particular transaction to be unreliable, -+but the PCI Express link itself is fully functional. Fatal errors, on -+the other hand, cause the link to be unreliable. -+ -+When AER is enabled, a PCI Express device will automatically send an -+error message to the PCIE root port above it when the device captures -+an error. The Root Port, upon receiving an error reporting message, -+internally processes and logs the error message in its PCI Express -+capability structure. Error information being logged includes storing -+the error reporting agent's requestor ID into the Error Source -+Identification Registers and setting the error bits of the Root Error -+Status Register accordingly. If AER error reporting is enabled in Root -+Error Command Register, the Root Port generates an interrupt if an -+error is detected. -+ -+Note that the errors as described above are related to the PCI Express -+hierarchy and links. These errors do not include any device specific -+errors because device specific errors will still get sent directly to -+the device driver. -+ -+3.1 Configure the AER capability structure -+ -+AER aware drivers of PCI Express component need change the device -+control registers to enable AER. They also could change AER registers, -+including mask and severity registers. Helper function -+pci_enable_pcie_error_reporting could be used to enable AER. See -+section 3.3. -+ -+3.2. Provide callbacks -+ -+3.2.1 callback reset_link to reset pci express link -+ -+This callback is used to reset the pci express physical link when a -+fatal error happens. The root port aer service driver provides a -+default reset_link function, but different upstream ports might -+have different specifications to reset pci express link, so all -+upstream ports should provide their own reset_link functions. -+ -+In struct pcie_port_service_driver, a new pointer, reset_link, is -+added. -+ -+pci_ers_result_t (*reset_link) (struct pci_dev *dev); -+ -+Section 3.2.2.2 provides more detailed info on when to call -+reset_link. -+ -+3.2.2 PCI error-recovery callbacks -+ -+The PCI Express AER Root driver uses error callbacks to coordinate -+with downstream device drivers associated with a hierarchy in question -+when performing error recovery actions. -+ -+Data struct pci_driver has a pointer, err_handler, to point to -+pci_error_handlers who consists of a couple of callback function -+pointers. AER driver follows the rules defined in -+pci-error-recovery.txt except pci express specific parts (e.g. -+reset_link). Pls. refer to pci-error-recovery.txt for detailed -+definitions of the callbacks. -+ -+Below sections specify when to call the error callback functions. -+ -+3.2.2.1 Correctable errors -+ -+Correctable errors pose no impacts on the functionality of -+the interface. The PCI Express protocol can recover without any -+software intervention or any loss of data. These errors do not -+require any recovery actions. The AER driver clears the device's -+correctable error status register accordingly and logs these errors. -+ -+3.2.2.2 Non-correctable (non-fatal and fatal) errors -+ -+If an error message indicates a non-fatal error, performing link reset -+at upstream is not required. The AER driver calls error_detected(dev, -+pci_channel_io_normal) to all drivers associated within a hierarchy in -+question. for example, -+EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort. -+If Upstream port A captures an AER error, the hierarchy consists of -+Downstream port B and EndPoint. -+ -+A driver may return PCI_ERS_RESULT_CAN_RECOVER, -+PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on -+whether it can recover or the AER driver calls mmio_enabled as next. -+ -+If an error message indicates a fatal error, kernel will broadcast -+error_detected(dev, pci_channel_io_frozen) to all drivers within -+a hierarchy in question. Then, performing link reset at upstream is -+necessary. As different kinds of devices might use different approaches -+to reset link, AER port service driver is required to provide the -+function to reset link. Firstly, kernel looks for if the upstream -+component has an aer driver. If it has, kernel uses the reset_link -+callback of the aer driver. If the upstream component has no aer driver -+and the port is downstream port, we will use the aer driver of the -+root port who reports the AER error. As for upstream ports, -+they should provide their own aer service drivers with reset_link -+function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and -+reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes -+to mmio_enabled. -+ -+3.3 helper functions -+ -+3.3.1 int pci_find_aer_capability(struct pci_dev *dev); -+pci_find_aer_capability locates the PCI Express AER capability -+in the device configuration space. If the device doesn't support -+PCI-Express AER, the function returns 0. -+ -+3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev); -+pci_enable_pcie_error_reporting enables the device to send error -+messages to root port when an error is detected. Note that devices -+don't enable the error reporting by default, so device drivers need -+call this function to enable it. -+ -+3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev); -+pci_disable_pcie_error_reporting disables the device to send error -+messages to root port when an error is detected. -+ -+3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); -+pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable -+error status register. -+ -+3.4 Frequent Asked Questions -+ -+Q: What happens if a PCI Express device driver does not provide an -+error recovery handler (pci_driver->err_handler is equal to NULL)? -+ -+A: The devices attached with the driver won't be recovered. If the -+error is fatal, kernel will print out warning messages. Please refer -+to section 3 for more information. -+ -+Q: What happens if an upstream port service driver does not provide -+callback reset_link? -+ -+A: Fatal error recovery will fail if the errors are reported by the -+upstream ports who are attached by the service driver. -+ -+Q: How does this infrastructure deal with driver that is not PCI -+Express aware? -+ -+A: This infrastructure calls the error callback functions of the -+driver when an error happens. But if the driver is not aware of -+PCI Express, the device might not report its own errors to root -+port. -+ -+Q: What modifications will that driver need to make it compatible -+with the PCI Express AER Root driver? -+ -+A: It could call the helper functions to enable AER in devices and -+cleanup uncorrectable status register. Pls. refer to section 3.3. -+ -diff --git a/Documentation/PCIEBUS-HOWTO.txt b/Documentation/PCIEBUS-HOWTO.txt -deleted file mode 100644 -index c93f42a..0000000 ---- a/Documentation/PCIEBUS-HOWTO.txt -+++ /dev/null -@@ -1,217 +0,0 @@ -- The PCI Express Port Bus Driver Guide HOWTO -- Tom L Nguyen tom.l.nguyen@intel.com -- 11/03/2004 -- --1. About this guide -- --This guide describes the basics of the PCI Express Port Bus driver --and provides information on how to enable the service drivers to --register/unregister with the PCI Express Port Bus Driver. -- --2. Copyright 2004 Intel Corporation -- --3. What is the PCI Express Port Bus Driver -- --A PCI Express Port is a logical PCI-PCI Bridge structure. There --are two types of PCI Express Port: the Root Port and the Switch --Port. The Root Port originates a PCI Express link from a PCI Express --Root Complex and the Switch Port connects PCI Express links to --internal logical PCI buses. The Switch Port, which has its secondary --bus representing the switch's internal routing logic, is called the --switch's Upstream Port. The switch's Downstream Port is bridging from --switch's internal routing bus to a bus representing the downstream --PCI Express link from the PCI Express Switch. -- --A PCI Express Port can provide up to four distinct functions, --referred to in this document as services, depending on its port type. --PCI Express Port's services include native hotplug support (HP), --power management event support (PME), advanced error reporting --support (AER), and virtual channel support (VC). These services may --be handled by a single complex driver or be individually distributed --and handled by corresponding service drivers. -- --4. Why use the PCI Express Port Bus Driver? -- --In existing Linux kernels, the Linux Device Driver Model allows a --physical device to be handled by only a single driver. The PCI --Express Port is a PCI-PCI Bridge device with multiple distinct --services. To maintain a clean and simple solution each service --may have its own software service driver. In this case several --service drivers will compete for a single PCI-PCI Bridge device. --For example, if the PCI Express Root Port native hotplug service --driver is loaded first, it claims a PCI-PCI Bridge Root Port. The --kernel therefore does not load other service drivers for that Root --Port. In other words, it is impossible to have multiple service --drivers load and run on a PCI-PCI Bridge device simultaneously --using the current driver model. -- --To enable multiple service drivers running simultaneously requires --having a PCI Express Port Bus driver, which manages all populated --PCI Express Ports and distributes all provided service requests --to the corresponding service drivers as required. Some key --advantages of using the PCI Express Port Bus driver are listed below: -- -- - Allow multiple service drivers to run simultaneously on -- a PCI-PCI Bridge Port device. -- -- - Allow service drivers implemented in an independent -- staged approach. -- -- - Allow one service driver to run on multiple PCI-PCI Bridge -- Port devices. -- -- - Manage and distribute resources of a PCI-PCI Bridge Port -- device to requested service drivers. -- --5. Configuring the PCI Express Port Bus Driver vs. Service Drivers -- --5.1 Including the PCI Express Port Bus Driver Support into the Kernel -- --Including the PCI Express Port Bus driver depends on whether the PCI --Express support is included in the kernel config. The kernel will --automatically include the PCI Express Port Bus driver as a kernel --driver when the PCI Express support is enabled in the kernel. -- --5.2 Enabling Service Driver Support -- --PCI device drivers are implemented based on Linux Device Driver Model. --All service drivers are PCI device drivers. As discussed above, it is --impossible to load any service driver once the kernel has loaded the --PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver --Model requires some minimal changes on existing service drivers that --imposes no impact on the functionality of existing service drivers. -- --A service driver is required to use the two APIs shown below to --register its service with the PCI Express Port Bus driver (see --section 5.2.1 & 5.2.2). It is important that a service driver --initializes the pcie_port_service_driver data structure, included in --header file /include/linux/pcieport_if.h, before calling these APIs. --Failure to do so will result an identity mismatch, which prevents --the PCI Express Port Bus driver from loading a service driver. -- --5.2.1 pcie_port_service_register -- --int pcie_port_service_register(struct pcie_port_service_driver *new) -- --This API replaces the Linux Driver Model's pci_module_init API. A --service driver should always calls pcie_port_service_register at --module init. Note that after service driver being loaded, calls --such as pci_enable_device(dev) and pci_set_master(dev) are no longer --necessary since these calls are executed by the PCI Port Bus driver. -- --5.2.2 pcie_port_service_unregister -- --void pcie_port_service_unregister(struct pcie_port_service_driver *new) -- --pcie_port_service_unregister replaces the Linux Driver Model's --pci_unregister_driver. It's always called by service driver when a --module exits. -- --5.2.3 Sample Code -- --Below is sample service driver code to initialize the port service --driver data structure. -- --static struct pcie_port_service_id service_id[] = { { -- .vendor = PCI_ANY_ID, -- .device = PCI_ANY_ID, -- .port_type = PCIE_RC_PORT, -- .service_type = PCIE_PORT_SERVICE_AER, -- }, { /* end: all zeroes */ } --}; -- --static struct pcie_port_service_driver root_aerdrv = { -- .name = (char *)device_name, -- .id_table = &service_id[0], -- -- .probe = aerdrv_load, -- .remove = aerdrv_unload, -- -- .suspend = aerdrv_suspend, -- .resume = aerdrv_resume, --}; -- --Below is a sample code for registering/unregistering a service --driver. -- --static int __init aerdrv_service_init(void) --{ -- int retval = 0; -- -- retval = pcie_port_service_register(&root_aerdrv); -- if (!retval) { -- /* -- * FIX ME -- */ -- } -- return retval; --} -- --static void __exit aerdrv_service_exit(void) --{ -- pcie_port_service_unregister(&root_aerdrv); --} -- --module_init(aerdrv_service_init); --module_exit(aerdrv_service_exit); -- --6. Possible Resource Conflicts -- --Since all service drivers of a PCI-PCI Bridge Port device are --allowed to run simultaneously, below lists a few of possible resource --conflicts with proposed solutions. -- --6.1 MSI Vector Resource -- --The MSI capability structure enables a device software driver to call --pci_enable_msi to request MSI based interrupts. Once MSI interrupts --are enabled on a device, it stays in this mode until a device driver --calls pci_disable_msi to disable MSI interrupts and revert back to --INTx emulation mode. Since service drivers of the same PCI-PCI Bridge --port share the same physical device, if an individual service driver --calls pci_enable_msi/pci_disable_msi it may result unpredictable --behavior. For example, two service drivers run simultaneously on the --same physical Root Port. Both service drivers call pci_enable_msi to --request MSI based interrupts. A service driver may not know whether --any other service drivers have run on this Root Port. If either one --of them calls pci_disable_msi, it puts the other service driver --in a wrong interrupt mode. -- --To avoid this situation all service drivers are not permitted to --switch interrupt mode on its device. The PCI Express Port Bus driver --is responsible for determining the interrupt mode and this should be --transparent to service drivers. Service drivers need to know only --the vector IRQ assigned to the field irq of struct pcie_device, which --is passed in when the PCI Express Port Bus driver probes each service --driver. Service drivers should use (struct pcie_device*)dev->irq to --call request_irq/free_irq. In addition, the interrupt mode is stored --in the field interrupt_mode of struct pcie_device. -- --6.2 MSI-X Vector Resources -- --Similar to the MSI a device driver for an MSI-X capable device can --call pci_enable_msix to request MSI-X interrupts. All service drivers --are not permitted to switch interrupt mode on its device. The PCI --Express Port Bus driver is responsible for determining the interrupt --mode and this should be transparent to service drivers. Any attempt --by service driver to call pci_enable_msix/pci_disable_msix may --result unpredictable behavior. Service drivers should use --(struct pcie_device*)dev->irq and call request_irq/free_irq. -- --6.3 PCI Memory/IO Mapped Regions -- --Service drivers for PCI Express Power Management (PME), Advanced --Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access --PCI configuration space on the PCI Express port. In all cases the --registers accessed are independent of each other. This patch assumes --that all service drivers will be well behaved and not overwrite --other service driver's configuration settings. -- --6.4 PCI Config Registers -- --Each service driver runs its PCI config operations on its own --capability structure except the PCI Express capability structure, in --which Root Control register and Device Control register are shared --between PME and AER. This patch assumes that all service drivers --will be well behaved and not overwrite other service driver's --configuration settings. -diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches -index 1fc4e71..9c93a03 100644 ---- a/Documentation/SubmittingPatches -+++ b/Documentation/SubmittingPatches -@@ -183,7 +183,7 @@ Even if the maintainer did not respond in step #4, make sure to ALWAYS - copy the maintainer when you change their code. - - For small patches you may want to CC the Trivial Patch Monkey --trivial@kernel.org managed by Adrian Bunk; which collects "trivial" -+trivial@kernel.org managed by Jesper Juhl; which collects "trivial" - patches. Trivial patches must qualify for one of the following rules: - Spelling fixes in documentation - Spelling fixes which could break grep(1) -@@ -196,7 +196,7 @@ patches. Trivial patches must qualify for one of the following rules: - since people copy, as long as it's trivial) - Any fix by the author/maintainer of the file (ie. patch monkey - in re-transmission mode) --URL: -+URL: - - - -diff --git a/Documentation/arm/Samsung-S3C24XX/NAND.txt b/Documentation/arm/Samsung-S3C24XX/NAND.txt -new file mode 100644 -index 0000000..bc478a3 ---- /dev/null -+++ b/Documentation/arm/Samsung-S3C24XX/NAND.txt -@@ -0,0 +1,30 @@ -+ S3C24XX NAND Support -+ ==================== -+ -+Introduction -+------------ -+ -+Small Page NAND -+--------------- -+ -+The driver uses a 512 byte (1 page) ECC code for this setup. The -+ECC code is not directly compatible with the default kernel ECC -+code, so the driver enforces its own OOB layout and ECC parameters -+ -+Large Page NAND -+--------------- -+ -+The driver is capable of handling NAND flash with a 2KiB page -+size, with support for hardware ECC generation and correction. -+ -+Unlike the 512byte page mode, the driver generates ECC data for -+each 256 byte block in an 2KiB page. This means that more than -+one error in a page can be rectified. It also means that the -+OOB layout remains the default kernel layout for these flashes. -+ -+ -+Document Author -+--------------- -+ -+Ben Dooks, Copyright 2007 Simtec Electronics -+ -diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt -index c31b76f..d04e1e3 100644 ---- a/Documentation/arm/Samsung-S3C24XX/Overview.txt -+++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt -@@ -156,6 +156,8 @@ NAND - controller. If there are any problems the latest linux-mtd - code can be found from http://www.linux-mtd.infradead.org/ - -+ For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt -+ - - Serial - ------ -diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt -index 93f223b..4dbb8be 100644 ---- a/Documentation/block/biodoc.txt -+++ b/Documentation/block/biodoc.txt -@@ -1097,7 +1097,7 @@ lock themselves, if required. Drivers that explicitly used the - io_request_lock for serialization need to be modified accordingly. - Usually it's as easy as adding a global lock: - -- static spinlock_t my_driver_lock = SPIN_LOCK_UNLOCKED; -+ static DEFINE_SPINLOCK(my_driver_lock); - - and passing the address to that lock to blk_init_queue(). - -diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt -new file mode 100644 -index 0000000..000b0fb ---- /dev/null -+++ b/Documentation/braille-console.txt -@@ -0,0 +1,34 @@ -+ Linux Braille Console -+ -+To get early boot messages on a braille device (before userspace screen -+readers can start), you first need to compile the support for the usual serial -+console (see serial-console.txt), and for braille device (in Device Drivers - -+Accessibility). -+ -+Then you need to specify a console=brl, option on the kernel command line, the -+format is: -+ -+ console=brl,serial_options... -+ -+where serial_options... are the same as described in serial-console.txt -+ -+So for instance you can use console=brl,ttyS0 if the braille device is connected -+to the first serial port, and console=brl,ttyS0,115200 to override the baud rate -+to 115200, etc. -+ -+By default, the braille device will just show the last kernel message (console -+mode). To review previous messages, press the Insert key to switch to the VT -+review mode. In review mode, the arrow keys permit to browse in the VT content, -+page up/down keys go at the top/bottom of the screen, and the home key goes back -+to the cursor, hence providing very basic screen reviewing facility. -+ -+Sound feedback can be obtained by adding the braille_console.sound=1 kernel -+parameter. -+ -+For simplicity, only one braille console can be enabled, other uses of -+console=brl,... will be discarded. Also note that it does not interfere with -+the console selection mecanism described in serial-console.txt -+ -+For now, only the VisioBraille device is supported. -+ -+Samuel Thibault -diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex -index c713aeb..c06233f 100644 ---- a/Documentation/cdrom/cdrom-standard.tex -+++ b/Documentation/cdrom/cdrom-standard.tex -@@ -777,7 +777,7 @@ Note that a driver must have one static structure, $_dops$, while - it may have as many structures $_info$ as there are minor devices - active. $Register_cdrom()$ builds a linked list from these. - --\subsection{$Int\ unregister_cdrom(struct\ cdrom_device_info * cdi)$} -+\subsection{$Void\ unregister_cdrom(struct\ cdrom_device_info * cdi)$} - - Unregistering device $cdi$ with minor number $MINOR(cdi\to dev)$ removes - the minor device from the list. If it was the last registered minor for -diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt -index 31d12e2..c298a66 100644 ---- a/Documentation/cgroups.txt -+++ b/Documentation/cgroups.txt -@@ -500,8 +500,7 @@ post-attachment activity that requires memory allocations or blocking. - - void fork(struct cgroup_subsy *ss, struct task_struct *task) - --Called when a task is forked into a cgroup. Also called during --registration for all existing tasks. -+Called when a task is forked into a cgroup. - - void exit(struct cgroup_subsys *ss, struct task_struct *task) - -diff --git a/Documentation/cli-sti-removal.txt b/Documentation/cli-sti-removal.txt -index 0223c9d..60932b0 100644 ---- a/Documentation/cli-sti-removal.txt -+++ b/Documentation/cli-sti-removal.txt -@@ -43,7 +43,7 @@ would execute while the cli()-ed section is executing. - - but from now on a more direct method of locking has to be used: - -- spinlock_t driver_lock = SPIN_LOCK_UNLOCKED; -+ DEFINE_SPINLOCK(driver_lock); - struct driver_data; - - irq_handler (...) -diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt -new file mode 100644 -index 0000000..4dcea42 ---- /dev/null -+++ b/Documentation/controllers/devices.txt -@@ -0,0 +1,48 @@ -+Device Whitelist Controller -+ -+1. Description: -+ -+Implement a cgroup to track and enforce open and mknod restrictions -+on device files. A device cgroup associates a device access -+whitelist with each cgroup. A whitelist entry has 4 fields. -+'type' is a (all), c (char), or b (block). 'all' means it applies -+to all types and all major and minor numbers. Major and minor are -+either an integer or * for all. Access is a composition of r -+(read), w (write), and m (mknod). -+ -+The root device cgroup starts with rwm to 'all'. A child device -+cgroup gets a copy of the parent. Administrators can then remove -+devices from the whitelist or add new entries. A child cgroup can -+never receive a device access which is denied its parent. However -+when a device access is removed from a parent it will not also be -+removed from the child(ren). -+ -+2. User Interface -+ -+An entry is added using devices.allow, and removed using -+devices.deny. For instance -+ -+ echo 'c 1:3 mr' > /cgroups/1/devices.allow -+ -+allows cgroup 1 to read and mknod the device usually known as -+/dev/null. Doing -+ -+ echo a > /cgroups/1/devices.deny -+ -+will remove the default 'a *:* mrw' entry. -+ -+3. Security -+ -+Any task can move itself between cgroups. This clearly won't -+suffice, but we can decide the best way to adequately restrict -+movement as people get some experience with this. We may just want -+to require CAP_SYS_ADMIN, which at least is a separate bit from -+CAP_MKNOD. We may want to just refuse moving to a cgroup which -+isn't a descendent of the current one. Or we may want to use -+CAP_MAC_ADMIN, since we really are trying to lock down root. -+ -+CAP_SYS_ADMIN is needed to modify the whitelist or move another -+task to a new cgroup. (Again we'll probably want to change that). -+ -+A cgroup may not be granted more permissions than the cgroup's -+parent has. -diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt -new file mode 100644 -index 0000000..f196ac1 ---- /dev/null -+++ b/Documentation/controllers/resource_counter.txt -@@ -0,0 +1,181 @@ -+ -+ The Resource Counter -+ -+The resource counter, declared at include/linux/res_counter.h, -+is supposed to facilitate the resource management by controllers -+by providing common stuff for accounting. -+ -+This "stuff" includes the res_counter structure and routines -+to work with it. -+ -+ -+ -+1. Crucial parts of the res_counter structure -+ -+ a. unsigned long long usage -+ -+ The usage value shows the amount of a resource that is consumed -+ by a group at a given time. The units of measurement should be -+ determined by the controller that uses this counter. E.g. it can -+ be bytes, items or any other unit the controller operates on. -+ -+ b. unsigned long long max_usage -+ -+ The maximal value of the usage over time. -+ -+ This value is useful when gathering statistical information about -+ the particular group, as it shows the actual resource requirements -+ for a particular group, not just some usage snapshot. -+ -+ c. unsigned long long limit -+ -+ The maximal allowed amount of resource to consume by the group. In -+ case the group requests for more resources, so that the usage value -+ would exceed the limit, the resource allocation is rejected (see -+ the next section). -+ -+ d. unsigned long long failcnt -+ -+ The failcnt stands for "failures counter". This is the number of -+ resource allocation attempts that failed. -+ -+ c. spinlock_t lock -+ -+ Protects changes of the above values. -+ -+ -+ -+2. Basic accounting routines -+ -+ a. void res_counter_init(struct res_counter *rc) -+ -+ Initializes the resource counter. As usual, should be the first -+ routine called for a new counter. -+ -+ b. int res_counter_charge[_locked] -+ (struct res_counter *rc, unsigned long val) -+ -+ When a resource is about to be allocated it has to be accounted -+ with the appropriate resource counter (controller should determine -+ which one to use on its own). This operation is called "charging". -+ -+ This is not very important which operation - resource allocation -+ or charging - is performed first, but -+ * if the allocation is performed first, this may create a -+ temporary resource over-usage by the time resource counter is -+ charged; -+ * if the charging is performed first, then it should be uncharged -+ on error path (if the one is called). -+ -+ c. void res_counter_uncharge[_locked] -+ (struct res_counter *rc, unsigned long val) -+ -+ When a resource is released (freed) it should be de-accounted -+ from the resource counter it was accounted to. This is called -+ "uncharging". -+ -+ The _locked routines imply that the res_counter->lock is taken. -+ -+ -+ 2.1 Other accounting routines -+ -+ There are more routines that may help you with common needs, like -+ checking whether the limit is reached or resetting the max_usage -+ value. They are all declared in include/linux/res_counter.h. -+ -+ -+ -+3. Analyzing the resource counter registrations -+ -+ a. If the failcnt value constantly grows, this means that the counter's -+ limit is too tight. Either the group is misbehaving and consumes too -+ many resources, or the configuration is not suitable for the group -+ and the limit should be increased. -+ -+ b. The max_usage value can be used to quickly tune the group. One may -+ set the limits to maximal values and either load the container with -+ a common pattern or leave one for a while. After this the max_usage -+ value shows the amount of memory the container would require during -+ its common activity. -+ -+ Setting the limit a bit above this value gives a pretty good -+ configuration that works in most of the cases. -+ -+ c. If the max_usage is much less than the limit, but the failcnt value -+ is growing, then the group tries to allocate a big chunk of resource -+ at once. -+ -+ d. If the max_usage is much less than the limit, but the failcnt value -+ is 0, then this group is given too high limit, that it does not -+ require. It is better to lower the limit a bit leaving more resource -+ for other groups. -+ -+ -+ -+4. Communication with the control groups subsystem (cgroups) -+ -+All the resource controllers that are using cgroups and resource counters -+should provide files (in the cgroup filesystem) to work with the resource -+counter fields. They are recommended to adhere to the following rules: -+ -+ a. File names -+ -+ Field name File name -+ --------------------------------------------------- -+ usage usage_in_ -+ max_usage max_usage_in_ -+ limit limit_in_ -+ failcnt failcnt -+ lock no file :) -+ -+ b. Reading from file should show the corresponding field value in the -+ appropriate format. -+ -+ c. Writing to file -+ -+ Field Expected behavior -+ ---------------------------------- -+ usage prohibited -+ max_usage reset to usage -+ limit set the limit -+ failcnt reset to zero -+ -+ -+ -+5. Usage example -+ -+ a. Declare a task group (take a look at cgroups subsystem for this) and -+ fold a res_counter into it -+ -+ struct my_group { -+ struct res_counter res; -+ -+ -+ } -+ -+ b. Put hooks in resource allocation/release paths -+ -+ int alloc_something(...) -+ { -+ if (res_counter_charge(res_counter_ptr, amount) < 0) -+ return -ENOMEM; -+ -+ -+ } -+ -+ void release_something(...) -+ { -+ res_counter_uncharge(res_counter_ptr, amount); -+ -+ -+ } -+ -+ In order to keep the usage value self-consistent, both the -+ "res_counter_ptr" and the "amount" in release_something() should be -+ the same as they were in the alloc_something() when the releasing -+ resource was allocated. -+ -+ c. Provide the way to read res_counter values and set them (the cgroups -+ still can help with it). -+ -+ c. Compile and run :) -diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt -index af3b925..6c442d8 100644 ---- a/Documentation/cpu-freq/user-guide.txt -+++ b/Documentation/cpu-freq/user-guide.txt -@@ -154,6 +154,11 @@ scaling_governor, and by "echoing" the name of another - that some governors won't load - they only - work on some specific architectures or - processors. -+ -+cpuinfo_cur_freq : Current speed of the CPU, in KHz. -+ -+scaling_available_frequencies : List of available frequencies, in KHz. -+ - scaling_min_freq and - scaling_max_freq show the current "policy limits" (in - kHz). By echoing new values into these -@@ -162,6 +167,15 @@ scaling_max_freq show the current "policy limits" (in - first set scaling_max_freq, then - scaling_min_freq. - -+affected_cpus : List of CPUs that require software coordination -+ of frequency. -+ -+related_cpus : List of CPUs that need some sort of frequency -+ coordination, whether software or hardware. -+ -+scaling_driver : Hardware driver for cpufreq. -+ -+scaling_cur_freq : Current frequency of the CPU, in KHz. - - If you have selected the "userspace" governor which allows you to - set the CPU operating frequency to a specific value, you can read out -diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt -index ad2bb3b..fb7b361 100644 ---- a/Documentation/cpusets.txt -+++ b/Documentation/cpusets.txt -@@ -8,6 +8,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. - Modified by Paul Jackson - Modified by Christoph Lameter - Modified by Paul Menage -+Modified by Hidetoshi Seto - - CONTENTS: - ========= -@@ -20,7 +21,8 @@ CONTENTS: - 1.5 What is memory_pressure ? - 1.6 What is memory spread ? - 1.7 What is sched_load_balance ? -- 1.8 How do I use cpusets ? -+ 1.8 What is sched_relax_domain_level ? -+ 1.9 How do I use cpusets ? - 2. Usage Examples and Syntax - 2.1 Basic Usage - 2.2 Adding/removing cpus -@@ -169,6 +171,7 @@ files describing that cpuset: - - memory_migrate flag: if set, move pages to cpusets nodes - - cpu_exclusive flag: is cpu placement exclusive? - - mem_exclusive flag: is memory placement exclusive? -+ - mem_hardwall flag: is memory allocation hardwalled - - memory_pressure: measure of how much paging pressure in cpuset - - In addition, the root cpuset only has the following file: -@@ -220,17 +223,18 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than - a direct ancestor or descendent, may share any of the same CPUs or - Memory Nodes. - --A cpuset that is mem_exclusive restricts kernel allocations for --page, buffer and other data commonly shared by the kernel across --multiple users. All cpusets, whether mem_exclusive or not, restrict --allocations of memory for user space. This enables configuring a --system so that several independent jobs can share common kernel data, --such as file system pages, while isolating each jobs user allocation in --its own cpuset. To do this, construct a large mem_exclusive cpuset to --hold all the jobs, and construct child, non-mem_exclusive cpusets for --each individual job. Only a small amount of typical kernel memory, --such as requests from interrupt handlers, is allowed to be taken --outside even a mem_exclusive cpuset. -+A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", -+i.e. it restricts kernel allocations for page, buffer and other data -+commonly shared by the kernel across multiple users. All cpusets, -+whether hardwalled or not, restrict allocations of memory for user -+space. This enables configuring a system so that several independent -+jobs can share common kernel data, such as file system pages, while -+isolating each job's user allocation in its own cpuset. To do this, -+construct a large mem_exclusive cpuset to hold all the jobs, and -+construct child, non-mem_exclusive cpusets for each individual job. -+Only a small amount of typical kernel memory, such as requests from -+interrupt handlers, is allowed to be taken outside even a -+mem_exclusive cpuset. - - - 1.5 What is memory_pressure ? -@@ -497,7 +501,73 @@ the cpuset code to update these sched domains, it compares the new - partition requested with the current, and updates its sched domains, - removing the old and adding the new, for each change. - --1.8 How do I use cpusets ? -+ -+1.8 What is sched_relax_domain_level ? -+-------------------------------------- -+ -+In sched domain, the scheduler migrates tasks in 2 ways; periodic load -+balance on tick, and at time of some schedule events. -+ -+When a task is woken up, scheduler try to move the task on idle CPU. -+For example, if a task A running on CPU X activates another task B -+on the same CPU X, and if CPU Y is X's sibling and performing idle, -+then scheduler migrate task B to CPU Y so that task B can start on -+CPU Y without waiting task A on CPU X. -+ -+And if a CPU run out of tasks in its runqueue, the CPU try to pull -+extra tasks from other busy CPUs to help them before it is going to -+be idle. -+ -+Of course it takes some searching cost to find movable tasks and/or -+idle CPUs, the scheduler might not search all CPUs in the domain -+everytime. In fact, in some architectures, the searching ranges on -+events are limited in the same socket or node where the CPU locates, -+while the load balance on tick searchs all. -+ -+For example, assume CPU Z is relatively far from CPU X. Even if CPU Z -+is idle while CPU X and the siblings are busy, scheduler can't migrate -+woken task B from X to Z since it is out of its searching range. -+As the result, task B on CPU X need to wait task A or wait load balance -+on the next tick. For some applications in special situation, waiting -+1 tick may be too long. -+ -+The 'sched_relax_domain_level' file allows you to request changing -+this searching range as you like. This file takes int value which -+indicates size of searching range in levels ideally as follows, -+otherwise initial value -1 that indicates the cpuset has no request. -+ -+ -1 : no request. use system default or follow request of others. -+ 0 : no search. -+ 1 : search siblings (hyperthreads in a core). -+ 2 : search cores in a package. -+ 3 : search cpus in a node [= system wide on non-NUMA system] -+ ( 4 : search nodes in a chunk of node [on NUMA system] ) -+ ( 5~ : search system wide [on NUMA system]) -+ -+This file is per-cpuset and affect the sched domain where the cpuset -+belongs to. Therefore if the flag 'sched_load_balance' of a cpuset -+is disabled, then 'sched_relax_domain_level' have no effect since -+there is no sched domain belonging the cpuset. -+ -+If multiple cpusets are overlapping and hence they form a single sched -+domain, the largest value among those is used. Be careful, if one -+requests 0 and others are -1 then 0 is used. -+ -+Note that modifying this file will have both good and bad effects, -+and whether it is acceptable or not will be depend on your situation. -+Don't modify this file if you are not sure. -+ -+If your situation is: -+ - The migration costs between each cpu can be assumed considerably -+ small(for you) due to your special application's behavior or -+ special hardware support for CPU cache etc. -+ - The searching cost doesn't have impact(for you) or you can make -+ the searching cost enough small by managing cpuset to compact etc. -+ - The latency is required even it sacrifices cache hit rate etc. -+then increasing 'sched_relax_domain_level' would benefit you. -+ -+ -+1.9 How do I use cpusets ? - -------------------------- - - In order to minimize the impact of cpusets on critical kernel -@@ -639,7 +709,7 @@ Now you want to do something with this cpuset. - - In this directory you can find several files: - # ls --cpus cpu_exclusive mems mem_exclusive tasks -+cpus cpu_exclusive mems mem_exclusive mem_hardwall tasks - - Reading them will give you information about the state of this cpuset: - the CPUs and Memory Nodes it can use, the processes that are using -diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt -index c360d4e..59a91e5 100644 ---- a/Documentation/debugging-via-ohci1394.txt -+++ b/Documentation/debugging-via-ohci1394.txt -@@ -41,15 +41,19 @@ to a working state and enables physical DMA by default for all remote nodes. - This can be turned off by ohci1394's module parameter phys_dma=0. - - The alternative firewire-ohci driver in drivers/firewire uses filtered physical --DMA, hence is not yet suitable for remote debugging. -+DMA by default, which is more secure but not suitable for remote debugging. -+Compile the driver with CONFIG_FIREWIRE_OHCI_REMOTE_DMA (Kernel hacking menu: -+Remote debugging over FireWire with firewire-ohci) to get unfiltered physical -+DMA. - --Because ohci1394 depends on the PCI enumeration to be completed, an --initialization routine which runs pretty early (long before console_init() --which makes the printk buffer appear on the console can be called) was written. -+Because ohci1394 and firewire-ohci depend on the PCI enumeration to be -+completed, an initialization routine which runs pretty early has been -+implemented for x86. This routine runs long before console_init() can be -+called, i.e. before the printk buffer appears on the console. - - To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu: --Provide code for enabling DMA over FireWire early on boot) and pass the --parameter "ohci1394_dma=early" to the recompiled kernel on boot. -+Remote debugging over FireWire early on boot) and pass the parameter -+"ohci1394_dma=early" to the recompiled kernel on boot. - - Tools - ----- -diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt -new file mode 100644 -index 0000000..6680cab ---- /dev/null -+++ b/Documentation/device-mapper/dm-crypt.txt -@@ -0,0 +1,52 @@ -+dm-crypt -+========= -+ -+Device-Mapper's "crypt" target provides transparent encryption of block devices -+using the kernel crypto API. -+ -+Parameters: -+ -+ -+ Encryption cipher and an optional IV generation mode. -+ (In format cipher-chainmode-ivopts:ivmode). -+ Examples: -+ des -+ aes-cbc-essiv:sha256 -+ twofish-ecb -+ -+ /proc/crypto contains supported crypto modes -+ -+ -+ Key used for encryption. It is encoded as a hexadecimal number. -+ You can only use key sizes that are valid for the selected cipher. -+ -+ -+ The IV offset is a sector count that is added to the sector number -+ before creating the IV. -+ -+ -+ This is the device that is going to be used as backend and contains the -+ encrypted data. You can specify it as a path like /dev/xxx or a device -+ number :. -+ -+ -+ Starting sector within the device where the encrypted data begins. -+ -+Example scripts -+=============== -+LUKS (Linux Unified Key Setup) is now the preferred way to set up disk -+encryption with dm-crypt using the 'cryptsetup' utility, see -+http://luks.endorphin.org/ -+ -+[[ -+#!/bin/sh -+# Create a crypt device using dmsetup -+dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0" -+]] -+ -+[[ -+#!/bin/sh -+# Create a crypt device using cryptsetup and LUKS header with default cipher -+cryptsetup luksFormat $1 -+cryptsetup luksOpen $1 crypt1 -+]] -diff --git a/Documentation/dontdiff b/Documentation/dontdiff -index c09a96b..881e6dd 100644 ---- a/Documentation/dontdiff -+++ b/Documentation/dontdiff -@@ -47,7 +47,6 @@ - .mm - 53c700_d.h - 53c8xx_d.h* --BitKeeper - COPYING - CREDITS - CVS -@@ -142,6 +141,7 @@ mkprep - mktables - mktree - modpost -+modules.order - modversions.h* - offset.h - offsets.h -@@ -172,6 +172,7 @@ sm_tbl* - split-include - tags - tftpboot.img -+timeconst.h - times.h* - tkparse - trix_boot.h -diff --git a/Documentation/early-userspace/README b/Documentation/early-userspace/README -index 766d320..e35d830 100644 ---- a/Documentation/early-userspace/README -+++ b/Documentation/early-userspace/README -@@ -89,8 +89,8 @@ the 2.7 era (it missed the boat for 2.5). - You can obtain somewhat infrequent snapshots of klibc from - ftp://ftp.kernel.org/pub/linux/libs/klibc/ - --For active users, you are better off using the klibc BitKeeper --repositories, at http://klibc.bkbits.net/ -+For active users, you are better off using the klibc git -+repository, at http://git.kernel.org/?p=libs/klibc/klibc.git - - The standalone klibc distribution currently provides three components, - in addition to the klibc library: -diff --git a/Documentation/fb/gxfb.txt b/Documentation/fb/gxfb.txt -new file mode 100644 -index 0000000..2f64090 ---- /dev/null -+++ b/Documentation/fb/gxfb.txt -@@ -0,0 +1,52 @@ -+[This file is cloned from VesaFB/aty128fb] -+ -+What is gxfb? -+================= -+ -+This is a graphics framebuffer driver for AMD Geode GX2 based processors. -+ -+Advantages: -+ -+ * No need to use AMD's VSA code (or other VESA emulation layer) in the -+ BIOS. -+ * It provides a nice large console (128 cols + 48 lines with 1024x768) -+ without using tiny, unreadable fonts. -+ * You can run XF68_FBDev on top of /dev/fb0 -+ * Most important: boot logo :-) -+ -+Disadvantages: -+ -+ * graphic mode is slower than text mode... -+ -+ -+How to use it? -+============== -+ -+Switching modes is done using gxfb.mode_option=... boot -+parameter or using `fbset' program. -+ -+See Documentation/fb/modedb.txt for more information on modedb -+resolutions. -+ -+ -+X11 -+=== -+ -+XF68_FBDev should generally work fine, but it is non-accelerated. -+ -+ -+Configuration -+============= -+ -+You can pass kernel command line options to gxfb with gxfb.