--- linux/fs/nfs/Makefile.nfsattack-gafton	Wed Jun 24 17:30:10 1998
+++ linux/fs/nfs/Makefile	Fri Feb  4 23:26:34 2000
@@ -9,10 +9,13 @@
 
 O_TARGET := nfs.o
 O_OBJS   := inode.o file.o read.o write.o dir.o symlink.o proc.o \
-	    nfs2xdr.o
+	    nfs2xdr.o cluster.o
 
 ifdef CONFIG_ROOT_NFS
   O_OBJS += nfsroot.o mount_clnt.o
+endif
+ifdef CONFIG_NFS_V3
+  O_OBJS += nfs3proc.o nfs3xdr.o
 endif
 
 M_OBJS   := $(O_TARGET)
--- linux/fs/nfs/cluster.c.nfsattack-gafton	Fri Feb  4 23:26:34 2000
+++ linux/fs/nfs/cluster.c	Fri Feb  4 23:26:34 2000
@@ -0,0 +1,1074 @@
+/*
+ * linux/fs/nfs/cluster.c
+ *
+ * Clustering of file data over NFS.
+ *
+ * The write-back code takes a two-level approach. At the lower level,
+ * there is a request associated with each dirty page.
+ * These pages are grouped in clusters in order to improve write scheduling.
+ * The benefits are:
+ *
+ *  1.  If we write out clusters of pages simultaneously, the write-gathering
+ *      code on the server side can optimize syncing the data (for NFSv2).
+ *  2.  For NFSv3, we can schedule COMMIT calls more intelligently.
+ *  3.  If the write block size is bigger than physical page size, we
+ *      are able to group several pages into one write request.
+ *
+ * Clusters are linked into the inode so that the file sync code et. al.
+ * can conveniently loop over all dirty pages. There is also a hash table
+ * of clusters so that the write() code itself can locate the relevant
+ * cluster fairly quick.
+ *
+ * Each cluster holds a list of requests (struct nfs_page) that represent
+ * the dirty portion of a page. These can be considered as an equivalent
+ * to the buffer heads currently used in the VFS. A request struct holds
+ * a minimal set of information to keep memory overhead low; most of
+ * the state is in fact kept in the cluster.
+ *
+ * For each NFS mount, there is a separate cache object that contains
+ * a hash table of all clusters. With this cache, an async RPC task
+ * (`clusterd') is associated, which wakes up occasionally to inspect
+ * its list of dirty buffers.
+ * (Note that RPC tasks aren't kernel threads. Take a look at the
+ * rpciod code to understand what they are).
+ *
+ * Inside the cache object, we also maintain a count of the current number
+ * of dirty pages, which may not exceed a certain threshold.
+ * (FIXME: This threshold should be configurable).
+ *
+ * The code is streamlined for what I think is the prevalent case for
+ * NFS traffic, which is sequential write access without concurrent
+ * access by different processes.
+ *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ *
+ * Some parts
+ * Copyright (C) 1999, Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <linux/types.h>
+#include <linux/malloc.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+
+#include <linux/sched.h>
+
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+
+#include <asm/spinlock.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_cluster.h>
+#include <linux/nfs_mount.h>
+
+/*
+ * Various constants
+ */
+#define NFSDBG_FACILITY         NFSDBG_PAGECACHE
+
+/*
+ * This is the wait queue all cluster daemons sleep on
+ */
+static struct rpc_wait_queue    cluster_queue = RPC_INIT_WAITQ("clusterd");
+
+/*
+ * This is the per-mount writeback cache.
+ */
+struct nfs_reqlist {
+	unsigned int		nr_clusters;
+	unsigned int		nr_requests;
+	unsigned long		runat;
+	struct wait_queue *	request_wait;
+
+	/* The async RPC task that is responsible for scanning the
+	 * clusters.
+	 */
+	struct rpc_task		*task;		/* cluster flush task */
+
+	/* Authentication flavor handle for this NFS client */
+	struct rpc_auth *       auth;
+
+	/* The 'hash' table of all clusters.
+	 * Technically, this needn't be a hash table; all we
+	 * want is an even distribution of clusters across
+	 * slots in this table.
+	 */
+	struct nfs_cluster *	clusters[CLUSTER_HASH_SIZE];
+};
+
+/*
+ * Spinlock
+ */
+spinlock_t nfs_clusterd_lock = SPIN_LOCK_UNLOCKED;
+rwlock_t nfs_cluster_lock = RW_LOCK_UNLOCKED;
+rwlock_t nfs_wreq_lock = RW_LOCK_UNLOCKED;
+
+/*
+ * Local function declarations.
+ */
+static void                     nfs_clusterd(struct rpc_task *);
+static void                     nfs_clusterd_exit(struct rpc_task *);
+
+
+int nfs_reqlist_init(struct nfs_server *server)
+{
+        struct nfs_reqlist   *cache;
+        struct rpc_task      *task;
+
+        dprintk("NFS: writecache_init\n");
+        cache = server->rw_requests;
+
+	if (cache->task)
+		return 0;
+
+        /* Create the RPC task */
+        task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
+	if (!task)
+		return -ENOMEM;
+
+	task->tk_calldata = server;
+
+	spin_lock(&nfs_clusterd_lock);
+	if (cache->task) {
+		spin_unlock(&nfs_clusterd_lock);
+		rpc_release_task(task);
+		return 0;
+	}
+	cache->task = task;
+
+        /* Put the task to sleep */
+	task->tk_timeout = 30 * HZ;
+	cache->runat = jiffies + task->tk_timeout;
+
+	cache->auth = server->client->cl_auth;
+	task->tk_action   = nfs_clusterd;
+	task->tk_exit   = nfs_clusterd_exit;
+	spin_unlock(&nfs_clusterd_lock);
+
+	rpc_sleep_on(&cluster_queue, task, NULL, NULL);
+	return 0;
+}
+
+void
+nfs_reqlist_exit(struct nfs_server *server)
+{
+	struct nfs_reqlist      *cache;
+	struct rpc_task         *task;
+
+	cache = server->rw_requests;
+	task = cache->task;
+
+	dprintk("NFS: reqlist_exit (ptr %p rpc %p)\n", cache, task);
+	if (task)
+		rpc_kill(task, -EIO);
+}
+
+int nfs_reqlist_alloc(struct nfs_server *server)
+{
+	struct nfs_reqlist	*cache;
+	if (server->rw_requests)
+		return 0;
+
+	cache = (struct nfs_reqlist *)kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (!cache)
+		return -ENOMEM;
+
+	memset(cache, 0, sizeof(*cache));
+	server->rw_requests = cache;
+
+	return 0;
+}
+
+void nfs_reqlist_free(struct nfs_server *server)
+{
+	if (server->rw_requests) {
+		kfree(server->rw_requests);
+		server->rw_requests = NULL;
+	}
+}
+
+/*
+ * Check whether the file range we want to write to is locked by
+ * us.
+ */
+static int
+region_locked(struct inode *inode, struct nfs_page *req)
+{
+        struct file_lock        *fl;
+
+        /* Don't optimize writes if we don't use NLM */
+        if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+                return 0;
+
+        for (fl = inode->i_flock; fl; fl = fl->fl_next) {
+                if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
+                 && fl->fl_type == F_WRLCK
+                 && fl->fl_start <= req->start && req->end <= fl->fl_end) {
+                        return 1;
+                }
+        }
+
+        return 0;
+}
+
+void cluster_schedule_scan(struct nfs_cluster *cluster, unsigned long time)
+{
+	struct dentry		*dentry = cluster->file->f_dentry;
+	struct inode		*inode = dentry->d_inode;
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct rpc_task		*task;
+
+	task = cache->task;
+	if (!task) {
+		nfs_reqlist_init(NFS_SERVER(inode));
+		task = cache->task;
+	}
+	if (time_after(cluster->nextscan, time)) {
+		cluster->nextscan = time;
+		if (time_after(cache->runat, time) && task && task->tk_timeout > 1*HZ)
+			rpc_wake_up_task(task);
+	}
+}
+
+static inline struct nfs_cluster *
+find_cluster(struct file *file, __u64 base)
+{
+	struct inode		*inode = file->f_dentry->d_inode;
+	struct nfs_cluster	*head, *c;
+
+	/* Loop over all clusters */
+	head = c = NFS_CLUSTERS(inode);
+	if (!c)
+		return NULL;
+
+	do {
+		if (c->file != file)
+			continue;
+		if ((c->start & CLUSTER_MASK) == base) {
+			c->count++;
+			return c;
+		}
+	} while ((c = CL_NEXT(c)) != head);
+	return NULL;
+}
+
+static inline struct nfs_cluster *
+find_cluster_page(struct file *file, struct page *page)
+{
+	return find_cluster(file, page->offset & CLUSTER_MASK);
+}
+
+static void
+insert_cluster(struct nfs_cluster *cluster, struct inode *inode)
+{
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct nfs_cluster	*next;
+        unsigned int		hash;
+
+	cluster->count++;
+	rpc_append_list(&NFS_CLUSTERS(inode), cluster);
+
+        /* Append the cluster to the hash table */
+        hash = CLUSTER_HASH(inode->i_ino, cluster->start);
+	cluster->hash_next = NULL;
+        if (cache->clusters[hash] == NULL) {
+                cache->clusters[hash] = cluster;
+		cluster->hash_prev = NULL;
+	} else {
+		next = cache->clusters[hash];
+		while(next->hash_next)
+			next = next->hash_next;
+		cluster->hash_prev = next;
+		next->hash_next = cluster;
+	}
+	cache->nr_clusters++;
+
+}
+
+/*
+ * Create a new cluster and associated RPC task.
+ *
+ * Clusters are created only in update_request, and we know the inode
+ * is nfs-locked when we get there.
+ */
+static struct nfs_cluster *
+create_cluster(struct file *file, __u64 rqstart, __u64 rqend)
+{
+	struct dentry		*dentry = file->f_dentry;
+	struct inode		*inode = dentry->d_inode;
+	struct nfs_cluster	*cluster, *c;
+
+	dprintk("NFS:      create_cluster(%x/%ld, %ld)\n",
+		inode->i_dev, inode->i_ino,
+		(long) (rqstart & CLUSTER_MASK));
+
+	/* Should never fail (but we may sleep!) */
+	cluster = (struct nfs_cluster *) rpc_allocate(RPC_TASK_ASYNC, sizeof(*cluster));
+	if (cluster == NULL)
+		return NULL;
+
+	dprintk("NFS:      allocated cluster %p\n", cluster);
+	memset(cluster, 0, sizeof(*cluster));
+
+	cluster->file     = file;
+	cluster->start    = rqstart;
+	cluster->end      = rqend;
+	cluster->sequence = 1;
+	cluster->nextscan = jiffies + NFS_WRITEBACK_LOCKDELAY;
+
+
+	/* Prevent deletion of clusters */
+	write_lock(&nfs_cluster_lock);
+	c = find_cluster(file, rqstart & CLUSTER_MASK);
+	if (c) {
+		write_unlock(&nfs_cluster_lock);
+		rpc_free(cluster);
+		cluster = c;
+	} else {
+		/* Attach cluster to the inode */
+		file->f_count++;
+		insert_cluster(cluster, inode);
+		write_unlock(&nfs_cluster_lock);
+	}
+
+	return cluster;
+}
+
+/*
+ * Delete an empty cluster
+ */
+void
+delete_cluster(struct nfs_cluster *cluster)
+{
+	struct nfs_reqlist	*cache;
+	struct nfs_cluster	*prev, *next;
+	struct dentry		*dentry = cluster->file->f_dentry;
+	struct inode		*inode = dentry->d_inode;
+	unsigned int		hash;
+
+	dprintk("NFS:      delete_cluster(%p)\n", cluster);
+
+	/* Prevent creation of new clusters */
+	write_lock(&nfs_cluster_lock);
+	if (cluster->count) {
+		write_unlock(&nfs_cluster_lock);
+		return;
+	}
+
+	if (cluster->pages)
+		printk(KERN_ERR "NFS: Arrgh! Cluster not empty!\n");
+
+	/* Remove from inode list of clusters */
+	rpc_remove_list(&NFS_CLUSTERS(inode), cluster);
+
+	/* Remove from cluster hash table */
+	cache = NFS_REQUESTLIST(inode);
+	hash = CLUSTER_HASH(inode->i_ino, cluster->start);
+	prev = cluster->hash_prev;
+	next = cluster->hash_next;
+	if (next)
+		next->hash_prev = prev;
+	if (prev)
+		prev->hash_next = next;
+	else
+		cache->clusters[hash] = next;
+	cache->nr_clusters--;
+
+	write_unlock(&nfs_cluster_lock);
+
+	/* Free cluster memory */
+	fput(cluster->file);
+	rpc_free(cluster);
+}
+
+
+/*
+ * Find a request
+ */
+static struct nfs_page *
+nfs_find_request(struct inode *inode, struct page *page)
+{
+	struct nfs_cluster	*head, *c;
+	struct nfs_page		*req = NULL;
+	int			rqnr = REQUEST_NR(page->offset);
+	unsigned long		pgend = page->offset + PAGE_CACHE_SIZE;
+
+	/* Loop over all clusters */
+	read_lock(&nfs_cluster_lock);
+	head = c = NFS_CLUSTERS(inode);
+	if (!c) {
+		read_unlock(&nfs_cluster_lock);
+		return NULL;
+	}
+
+	do {
+		req = c->request[rqnr];
+		if (!req)
+			continue;
+		if (req->start > pgend)
+			continue;
+		if (req->end < page->offset)
+			continue;
+		read_unlock(&nfs_cluster_lock);
+		return req;
+	} while ((c = CL_NEXT(c)) != head);
+	read_unlock(&nfs_cluster_lock);
+	return NULL;
+}
+
+/*
+ * Insert a write request into a cluster
+ */
+static inline void
+nfs_insert_request(struct nfs_cluster *cluster, struct nfs_page *req)
+{
+	struct inode		*inode = cluster->file->f_dentry->d_inode;
+        cluster->count++;
+
+        if (cluster->request[REQUEST_NR(req->start)])
+		printk(KERN_ERR "NFS: Uh oh! Overwriting existing request!\n");
+
+        cluster->request[REQUEST_NR(req->start)] = req;
+        cluster->pages++;
+
+        /* Update the cluster region */
+        if (req->start < cluster->start)
+                cluster->start = req->start;
+        if (cluster->end < req->end)
+                cluster->end   = req->end;
+
+        NFS_REQUESTLIST(inode)->nr_requests++;
+}
+
+/*
+ * Create a write request.
+ * Page must be locked by the caller. This makes sure we never create
+ * two different requests for the same page, and avoids possible deadlock
+ * when we reach the hard limit on the number of dirty pages.
+ */
+static struct nfs_page *
+nfs_create_request(struct nfs_cluster *cluster, struct page *page,
+                        __u64 rqstart, __u64 rqend)
+{
+        struct inode            *inode = cluster->file->f_dentry->d_inode;
+        struct nfs_reqlist      *cache = NFS_REQUESTLIST(inode);
+        struct nfs_page         *req = NULL;
+        signed long             timeout;
+
+        /* Deal with hard/soft limits.
+         * Increment the reference count on the cluster to make sure it
+         * doesn't go away while we sleep.
+         */
+        cluster->count++;
+        atomic_inc(&page->count);
+        while (1) {
+                /* If we're over the soft limit, wake up some requests */
+		if (cache->nr_requests >= MAX_REQUEST_SOFT) {
+			dprintk("NFS:      hit soft limit (%d requests)\n",
+                                                        cache->nr_requests);
+			if (cache->task)
+				rpc_wake_up_task(cache->task);
+		}
+
+                /* If we haven't reached the hard limit yet,
+                 * try to allocate the request struct */
+		if (cache->nr_requests < MAX_REQUEST_HARD) {
+			req = (struct nfs_page *) rpc_allocate(RPC_TASK_ASYNC, sizeof(*req));
+			if (req != NULL)
+				break;
+		}
+
+		/* We're over the hard limit. Wait for better times */
+		dprintk("NFS:      create_request sleeping (total %d pid %d)\n",
+			cache->nr_requests, current->pid);
+		timeout = HZ >> 4;
+		if (NFS_SERVER(inode)->flags & NFS_MOUNT_INTR) {
+			interruptible_sleep_on_timeout(&cache->request_wait,
+						       timeout);
+			if (signalled()) {
+				page_cache_release(page);
+				goto done;
+			}
+		} else {
+			sleep_on_timeout(&cache->request_wait, timeout);
+		}
+		dprintk("NFS:      create_request waking up (tot %d pid %d)\n",
+			cache->nr_requests, current->pid);
+	}
+
+        /* Initialize the request struct. Initially, we assume a
+         * long write-back delay. This will be adjusted in
+         * update_nfs_request below if the region is not locked. */
+	memset(req, 0, sizeof(*req));
+        req->cluster = cluster;
+        req->page    = page;
+        req->start   = rqstart;
+        req->end     = rqend;
+        req->timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
+
+        /* Insert the request into the cluster */
+        nfs_insert_request(cluster, req);
+	cluster_schedule_scan(cluster, req->timeout);
+
+done:
+        release_cluster(cluster);
+        return req;
+}
+
+
+/*
+ * Try to update any existing write request, or create one if there is none.
+ * In order to match, the request's credentials must match those of
+ * the calling process.
+ *
+ * Note: Should always be called with the Page Lock held!
+ */
+int
+nfs_update_request(struct file* file, struct page *page,
+		   __u64 offset, __u64 bytes,
+		   struct nfs_page **reqp)
+{
+	struct inode		*inode = file->f_dentry->d_inode;
+	struct nfs_cluster      *cluster;
+	struct nfs_page         *req;
+	__u64                   rqstart, rqend;
+	__u64                   base;
+	int                     status = -ENOMEM;
+
+	base    = page->offset & CLUSTER_MASK;
+	rqstart = page->offset + offset;
+	rqend   = rqstart + bytes;
+
+	/* Loop over all clusters and see if we find
+	 * A request for the page we wish to update
+	 * Note: we don't have to set nfs_wreq_lock, since
+	 *       we already hold the lock on the page itself...
+	 */
+	*reqp = req = nfs_find_request(inode, page);
+
+	if (req) {
+		/* We have a request for our page.
+		 * If the cluster creds don't match, or the
+		 * page addresses don't match (can that happen?),
+		 * tell the caller to wait on the conflicting
+		 * request.
+		 */
+		cluster = req->cluster;
+		if (rqstart > req->end || rqend < req->start
+		    || cluster->file != file || req->page != page)
+			return 0;
+
+		cluster->count++;
+		/* Okay, the request matches. Update the region */
+		if (rqstart < req->start) {
+			if (rqstart < cluster->start)
+				cluster->start = rqstart;
+			req->start = rqstart;
+		}
+		if (req->end < rqend) {
+			if (cluster->end < rqend)
+				cluster->end = rqend;
+			req->end = rqend;
+		}
+		goto out_update;
+	}
+
+	/* No request was found, so we look for a cluster
+	 * with the right
+	 */
+	read_lock(&nfs_cluster_lock);
+	cluster = find_cluster_page(file, page);
+	read_unlock(&nfs_cluster_lock);
+	if (cluster == NULL)
+		cluster = create_cluster(file, rqstart, rqend);
+	if (cluster == NULL)
+		goto out;
+
+	/* Create the request. It's safe to sleep in this call because
+	 * we only get here if the page is already locked.
+	 */
+	*reqp = req = nfs_create_request(cluster, page, rqstart, rqend);
+	if (req == NULL)
+		goto out;
+
+ out_update:
+	status = 1;
+
+	nfs_mark_request_dirty(cluster, req);
+
+	/* If the region is not locked, adjust the timeout */
+	if (!region_locked(inode, req)) {
+		unsigned long   timeout;
+
+		timeout = jiffies + NFS_WRITEBACK_DELAY;
+		if (time_after(req->timeout, timeout))
+			req->timeout = timeout;
+	}
+	cluster_schedule_scan(cluster, req->timeout);
+
+out:
+	release_cluster(cluster);
+	return status;
+}
+
+/*
+ * Wait for a request to complete.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ */
+int
+nfs_wait_on_page(struct inode *inode, struct page *page)
+{
+        struct rpc_clnt         *clnt = NFS_CLIENT(inode);
+        int retval;
+
+	if (!PageLocked(page))
+		return 0;
+	atomic_inc(&page->count);
+	retval = nfs_wait_event(clnt, page->wait, !PageLocked(page));
+	page_cache_release(page);
+
+        return retval;
+}
+
+/*
+ * Release all resources associated with a write request after it
+ * has been committed to stable storage
+ *
+ * Note: Should always be called with the spinlock held!
+ */
+void
+nfs_release_request(struct nfs_page *req)
+{
+	struct nfs_cluster	*cluster = req->cluster;
+	struct inode		*inode = cluster->file->f_dentry->d_inode;
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct page		*page = req->page;
+
+	cluster->request[REQUEST_NR(req->start)] = NULL;
+	cluster->pages--;
+	if ((page = req->page) != NULL) {
+		if (req->flags & PG_INVALIDATE_AFTER)
+			clear_bit(PG_uptodate, &page->flags);
+		if (req->flags & PG_UNLOCK_AFTER)
+			nfs_unlock_page(page);
+		page_cache_release(page);
+		req->page = NULL;
+	}
+	rpc_free(req);
+	if (!cluster->pages)
+		wake_up(&cluster->wait);
+	release_cluster(cluster);
+	/* wake up anyone waiting to allocate a request */
+	cache->nr_requests--;
+	wake_up(&cache->request_wait);
+}
+
+/*
+ * This is the strategy routine for NFS.
+ * It is called by nfs_updatepage whenever the user wrote up to the end
+ * of a page.
+ *
+ * We always try to submit a set of requests in parallel so that the
+ * server's write code can gather writes. This is mainly for the benefit
+ * of NFSv2.
+ *
+ * We never submit more requests than we think the remote can handle.
+ * For UDP sockets, we make sure we don't exceed the congestion window;
+ * for TCP, we limit the number of requests to 8.
+ *
+ * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
+ * should be sent out in one go. This is for the benefit of NFSv2 servers
+ * that perform write gathering.
+ *
+ * FIXME: Different servers may have different sweet spots.
+ * Record the average congestion window in server struct?
+ */
+#define NFS_STRATEGY_PAGES      4
+int
+nfs_strategy(struct nfs_cluster *cluster)
+{
+        struct inode    *inode = cluster->file->f_dentry->d_inode;
+        unsigned int    dirty, wpages;
+
+        /*
+	 * If we've reached the end of the cluster, flush everything
+	 * in order to reduce memory useage...
+	 */
+        if ((cluster->end & ~CLUSTER_MASK) == 0)
+		return nfs_sync_file(inode, cluster->file, cluster->start,
+				     cluster->end, FLUSH_WAIT);
+
+	dirty  = cluster->dirty - cluster->pending;
+	wpages = NFS_SERVER(inode)->wpages;
+	if (NFS_PROTO(inode)->version == 2) {
+		if (dirty < NFS_STRATEGY_PAGES * wpages)
+			return 0;
+        } else {
+		if (dirty < wpages)
+			return 0;
+        }
+
+	return sync_cluster(cluster, 0);
+}
+
+/*
+ * Flush out all dirty pages. Called for
+ *
+ *  -   fsync, fclose, and when releasing a write lock.
+ *      How is FLUSH_WAIT in this case.
+ *  -   nfs_invalidate_pages.
+ *      How is FLUSH_INVALIDATE|FLUSH_STABLE in this case.
+ *
+ * The start/end arguments specify a file range to flush. To make the
+ * logic simpler, we flush the entire cluster even if it overlaps the
+ * file range only partially.
+ *
+ * The inode must be nfs-locked when we get here, in order to avoid
+ * that new write requests get created while we're busy flushing out
+ * the old ones.
+ */
+int
+nfs_sync_file(struct inode *inode, struct file *file, __u64 start, __u64 end, int how)
+{
+	struct nfs_cluster	*cluster, *last, *head, *waiton;
+	int			error = 0, wait, count;
+
+	wait = how & FLUSH_WAIT;
+	if (wait)
+		how &= ~FLUSH_STABLE;
+
+	if (file && ! inode)
+		inode = file->f_dentry->d_inode;
+
+	/* Do this repeatedly. We may write all pages to the server only
+	 * to find out that it has rebooted when we get around to the
+	 * commit call.
+	 */
+again:
+	read_lock(&nfs_cluster_lock);
+	head = cluster = NFS_CLUSTERS(inode);
+	waiton = last = 0;
+	count = 0;
+
+	if (cluster == NULL) {
+		read_unlock(&nfs_cluster_lock);
+		goto out;
+	}
+
+	head->count++;
+
+	do {
+		if (cluster->pages && (file == 0 || file == cluster->file)
+		    && cluster->start < end && start < cluster->end) {
+			/* Cluster overlaps file region.
+			 * Flush it out and remember pointer. */
+			cluster->count++;
+			read_unlock(&nfs_cluster_lock);
+			release_cluster(last);
+			last = cluster;
+
+			if (wait && NFS_CONGESTED(inode)) {
+				error = nfs_wait_on_congest(inode);
+				if (error < 0)
+					goto out_nolock;
+			}
+			if (cluster->dirty && cluster->dirty != cluster->pending) {
+				error = sync_cluster(cluster, how);
+				if (error < 0)
+					goto out_nolock;
+			} else if (cluster->pages && !cluster->dirty && !cluster->committing) {
+				error = commit_cluster(cluster, how);
+				if (error < 0)
+					goto out_nolock;
+			}
+			if (wait && cluster->pages
+			    && (cluster->pending || cluster->committing)) {
+				release_cluster(waiton);
+				waiton = cluster;
+				waiton->count++;
+			}
+			read_lock(&nfs_cluster_lock);
+		}
+	} while ((cluster = CL_NEXT(cluster)) && cluster != head);
+	read_unlock(&nfs_cluster_lock);
+
+ out_nolock:
+	release_cluster(head);
+
+	if (error < 0)
+		goto out;
+
+	/* Wait for the last cluster to complete */
+	if (waiton) {
+		error = wait_cluster(waiton);
+		if (error < 0)
+			goto out;
+		release_cluster(waiton);
+		release_cluster(last);
+		goto again;
+	}
+
+out:
+	release_cluster(waiton);
+	release_cluster(last);
+	return error;
+}
+
+
+/*
+ * Send all dirty pages of a cluster to the server and lock them.
+ *
+ * When invalidating pages we must make sure every page gets locked
+ * down. Therefore we send even written but uncommitted pages to stable
+ * storage right away.
+ */
+int
+sync_cluster(struct nfs_cluster *cluster, int how)
+{
+	struct nfs_page *req;
+	struct inode	*inode;
+	unsigned int    nr, flushed = 0;
+	struct page	*last;
+	int		wait;
+	int             error = 0;
+
+	cluster->count++;
+
+	wait = how & FLUSH_WAIT;
+	if (wait && !(how & FLUSH_SYNC))
+		wait &= ~FLUSH_WAIT;
+
+	inode = cluster->file->f_dentry->d_inode;
+ repeat:
+	last = NULL;
+	read_lock(&nfs_wreq_lock);
+	for (nr = 0; nr < CLUSTER_PAGES; nr++) {
+		if (!(req = cluster->request[nr]))
+			continue;
+
+		if (how & FLUSH_INVALIDATE)
+			req->flags |= PG_INVALIDATE_AFTER;
+
+		if (!IS_DIRTY(req))
+			continue;
+
+		if (!nfs_lock_page(req)) {
+			atomic_inc(&req->page->count);
+			if (last)
+				page_cache_release(last);
+			last = req->page;
+			continue;
+		}
+		read_unlock(&nfs_wreq_lock);
+		error = nfs_flush_wback(req, how);
+		read_lock(&nfs_wreq_lock);
+		if (error < 0)
+			break;
+
+		flushed++;
+	}
+	read_unlock(&nfs_wreq_lock);
+
+	if (last) {
+		if (wait && error >= 0)
+			error = nfs_wait_on_page(inode, last);
+		page_cache_release(last);
+		if (wait && error >= 0)
+			goto repeat;
+	}
+
+	release_cluster(cluster);
+
+	if (error >= 0)
+		error = flushed;
+	return error;
+}
+
+/*
+ * Scan cluster for dirty pages and send as many of them to the
+ * server as possible.
+ */
+int
+scan_cluster(struct nfs_cluster *cluster, unsigned int max)
+{
+        struct nfs_page	*req;
+        unsigned int	nr, flushed = 0;
+        int		error = 0;
+
+	cluster->count++;
+
+	read_lock(&nfs_wreq_lock);
+        for (nr = 0; nr < CLUSTER_PAGES; nr++) {
+                if (!(req = cluster->request[nr]))
+                        continue;
+		if (time_after(req->timeout, jiffies)) {
+			if (time_after(cluster->nextscan, req->timeout))
+				cluster->nextscan = req->timeout;
+			continue;
+		}
+		if (IS_DIRTY(req) && nfs_lock_page(req)) {
+			read_unlock(&nfs_wreq_lock);
+                        error = nfs_flush_wback(req, FLUSH_AGING);
+			read_lock(&nfs_wreq_lock);
+			if (error < 0)
+				break;
+			if (max && ++flushed >= max)
+				break;
+		}
+	}
+	read_unlock(&nfs_wreq_lock);
+
+	if (error >= 0)
+		error = flushed;
+
+	if (!cluster->dirty && !cluster->committing)
+			error = commit_cluster(cluster, 0);
+
+        release_cluster(cluster);
+
+        return error;
+}
+
+/*
+ * Wait for all write requests to complete, and commit them when
+ * they're done.
+ */
+int
+wait_cluster(struct nfs_cluster *cluster)
+{
+	struct dentry		*dentry = cluster->file->f_dentry;
+	struct inode		*inode = dentry->d_inode;
+	struct rpc_clnt		*clnt = NFS_CLIENT(inode);
+	int			retval;
+
+	if (!cluster->pending && !cluster->committing)
+		return 0;
+	cluster->count++;
+        retval = nfs_wait_event(clnt, cluster->wait, !cluster->pending && !cluster->committing);
+	release_cluster(cluster);
+
+        return retval;
+}
+
+/*
+ * This is the cluster scanner.
+ *  -   If there are pages due to be written, send them out now.
+ *  -   If the total number of dirty pages has exceeded the soft
+ *      limit, sync the cluster now.
+ *  -   If the user has written past the end of the cluster, we
+ *      also flush the remaining dirty pages and initiate a commit
+ *      call.
+ */
+static void
+nfs_clusterd(struct rpc_task *task)
+{
+	struct nfs_server	*server;
+        struct nfs_reqlist	*cache;
+        struct nfs_cluster	*cluster, *previous;
+	struct inode		*inode = 0;
+        long			delay = NFS_WRITEBACK_LOCKDELAY;
+        unsigned int		hand;
+
+        dprintk("NFS: %4d clusterd starting\n", task->tk_pid);
+	server = (struct nfs_server *) task->tk_calldata;
+        cache = server->rw_requests;
+
+
+	if (!cache->nr_requests && !cache->nr_clusters)
+		goto out;
+
+        for(hand = 0; hand < CLUSTER_HASH_SIZE; hand++) {
+		read_lock(&nfs_cluster_lock);
+		cluster  = cache->clusters[hand];
+		previous = 0;
+		while (cluster) {
+			cluster->count++;
+			read_unlock(&nfs_cluster_lock);
+
+			if (!cluster->pages)
+				goto release;
+
+			inode = cluster->file->f_dentry->d_inode;
+			if (cluster->committing && !cluster->dirty)
+				goto release;
+
+			if (cluster->dirty == cluster->pending && cluster->dirty)
+				goto release;
+
+			if (cache->nr_requests >= MAX_REQUEST_SOFT) {
+				if (!cluster->dirty) {
+					commit_cluster(cluster, 0);
+				} else
+					sync_cluster(cluster, 0);
+			} else if (time_after(jiffies, cluster->nextscan)) {
+				cluster->nextscan = jiffies + NFS_WRITEBACK_LOCKDELAY;
+				scan_cluster(cluster, 0);
+			}
+
+			if (time_after(jiffies + delay, cluster->nextscan))
+				delay = cluster->nextscan - jiffies;
+
+release:
+			release_cluster(previous);
+			read_lock(&nfs_cluster_lock);
+			previous = cluster;
+			cluster  = cluster->hash_next;
+		}
+		read_unlock(&nfs_cluster_lock);
+		release_cluster(previous);
+	}
+
+	dprintk("NFS: %4d clusterd back to sleep\n", task->tk_pid);
+	if (delay < 1 * HZ)
+		delay = 1 * HZ;
+out:
+	if (!cache->nr_requests && !cache->nr_clusters) {
+		spin_lock(&nfs_clusterd_lock);
+		cache->task = NULL;
+		task->tk_action = NULL;
+		spin_unlock(&nfs_clusterd_lock);
+		return;
+	}
+
+	task->tk_status = 0;
+	task->tk_action = nfs_clusterd;
+	task->tk_timeout = delay;
+	cache->runat = jiffies + task->tk_timeout;
+	rpc_sleep_on(&cluster_queue, task, NULL, NULL);
+}
+
+static void
+nfs_clusterd_exit(struct rpc_task *task)
+{
+	struct nfs_server	*server;
+	struct nfs_reqlist	*cache;
+	server = (struct nfs_server *) task->tk_calldata;
+	cache = server->rw_requests;
+
+	spin_lock(&nfs_clusterd_lock);
+	if (cache->task == task)
+		cache->task = NULL;
+	spin_unlock(&nfs_clusterd_lock);
+}
+
+/*
+ * Wait for the RPC backlog to clear.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ */
+int
+nfs_wait_on_congest(struct inode *inode)
+{
+        struct rpc_clnt         *clnt = NFS_CLIENT(inode);
+	struct rpc_xprt		*xprt = clnt->cl_xprt;
+
+	if (!NFS_CONGESTED(inode))
+		return 0;
+
+	return nfs_wait_event(clnt, xprt->cong_wait, !NFS_CONGESTED(inode));
+}
--- linux/fs/nfs/dir.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/nfs/dir.c	Fri Feb  4 23:26:34 2000
@@ -14,6 +14,9 @@
  *              Following Linus comments on my original hack, this version
  *              depends only on the dcache stuff and doesn't touch the inode
  *              layer (iput() and friends).
+ *  6 Jun 1999  Cache readdir lookups in the page cache. -DaveM
+ *  7 Oct 1999  Rewrite of Dave's readdir stuff for NFSv3 support, and in order
+ *              to simplify cookie handling. -Trond
  */
 
 #include <linux/sched.h>
@@ -25,32 +28,24 @@
 #include <linux/malloc.h>
 #include <linux/mm.h>
 #include <linux/sunrpc/types.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
 
 #include <asm/segment.h>	/* for fs functions */
 
 #define NFS_PARANOIA 1
 /* #define NFS_DEBUG_VERBOSE 1 */
 
-/*
- * Head for a dircache entry. Currently still very simple; when
- * the cache grows larger, we will need a LRU list.
- */
-struct nfs_dirent {
-	dev_t			dev;		/* device number */
-	ino_t			ino;		/* inode number */
-	u32			cookie;		/* cookie of first entry */
-	unsigned short		valid  : 1,	/* data is valid */
-				locked : 1;	/* entry locked */
-	unsigned int		size;		/* # of entries */
-	unsigned long		age;		/* last used */
-	unsigned long		mtime;		/* last attr stamp */
-	struct wait_queue *	wait;
-	__u32 *			entry;		/* three __u32's per entry */
-};
-
 static int nfs_safe_remove(struct dentry *);
+static int _nfs_safe_remove(struct dentry *, struct rpc_cred *);
 
+static int nfs_dir_open(struct inode *, struct file *);
+static int nfs_dir_release(struct inode *, struct file *);
 static ssize_t nfs_dir_read(struct file *, char *, size_t, loff_t *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *);
@@ -72,9 +67,9 @@
 	NULL,			/* select - default */
 	NULL,			/* ioctl - default */
 	NULL,			/* mmap */
-	nfs_open,		/* open */
+	nfs_dir_open,		/* open */
 	NULL,			/* flush */
-	nfs_release,		/* release */
+	nfs_dir_release,	/* release */
 	NULL			/* fsync */
 };
 
@@ -101,267 +96,444 @@
 	nfs_revalidate,		/* revalidate */
 };
 
+static int
+nfs_dir_open(struct inode *dir, struct file *filp)
+{
+	struct dentry		*dentry = filp->f_dentry;
+	struct rpc_auth		*auth = NFS_CLIENT(dir)->cl_auth;
+	struct nfs_file		*data;
+	struct nfs_fattr	fattr;
+	int			error = 0;
+
+	dfprintk(VFS, "NFS: nfs_dir_open(%s/%s)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+
+	/* Don't try ACCESS on setuid processes */
+	if (current->uid != current->fsuid || current->gid != current->fsgid)
+		goto lookup_cred;
+
+	/* For NFSv3, we can do access checking on open */
+	if (NFS_PROTO(dir)->access) {
+		error = NFS_CALL(access, dir, (dentry, MAY_READ, &fattr));
+		nfs_refresh_inode(dir, &fattr);
+	}
+
+	if (error < 0)
+		goto out;
+
+ lookup_cred:
+	data = nfs_file_alloc();
+	if (data) {
+		data->cred = rpcauth_lookupcred(auth, 0);
+		filp->private_data = data;
+	} else
+		error = -ENOMEM;
+ out:
+	return error;
+}
+
+static int
+nfs_dir_release(struct inode *dir, struct file *filp)
+{
+	struct rpc_auth		*auth = NFS_CLIENT(dir)->cl_auth;
+	struct rpc_cred		*cred;
+
+	cred = nfs_file_cred(filp);
+	if (cred)
+		rpcauth_releasecred(auth, cred);
+	nfs_file_free(filp->private_data);
+	return 0;
+}
+
 static ssize_t
 nfs_dir_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
 {
 	return -EISDIR;
 }
 
-static struct nfs_dirent	dircache[NFS_MAX_DIRCACHE];
+typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
 
 /*
- * We need to do caching of directory entries to prevent an
- * incredible amount of RPC traffic.  Only the most recent open
- * directory is cached.  This seems sufficient for most purposes.
- * Technically, we ought to flush the cache on close but this is
- * not a problem in practice.
+ * Given a pointer to a buffer that has already been filled by a call
+ * to readdir, find the next entry.
  *
- * XXX: Do proper directory caching by stuffing data into the
- * page cache (may require some fiddling for rsize < PAGE_SIZE).
+ * If the end of the buffer has been reached, return -EAGAIN, if not,
+ * return the offset within the buffer of the next entry to be
+ * read.
  */
+static inline
+long find_dirent(struct page *page, unsigned long offset,
+		 struct nfs_entry *entry,
+		 decode_dirent_t decode, int plus)
+{
+	u8		*p = (u8 *)page_address(page),
+			*start = p;
+	unsigned long	base = page->offset,
+			pg_offset = 0;
+	int		loop_count = 0;
+
+	if (!p)
+		return -EIO;
+	for(;;) {
+		p = (u8*)decode((__u32*)p, entry, plus);
+		if (IS_ERR(p))
+			break;
+		pg_offset = p - start;
+		entry->prev = entry->offset;
+		entry->offset = base + pg_offset;
+		if (entry->offset > offset)
+			break;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
 
-static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct dentry 		*dentry = filp->f_dentry;
-	struct inode 		*inode = dentry->d_inode;
-	static struct wait_queue *readdir_wait = NULL;
-	struct wait_queue	**waitp = NULL;
-	struct nfs_dirent	*cache, *free;
-	unsigned long		age, dead;
-	u32			cookie;
-	int			ismydir, result;
-	int			i, j, index = 0;
-	__u32			*entry;
-	char			*name, *start;
-
-	dfprintk(VFS, "NFS: nfs_readdir(%s/%s)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	return (IS_ERR(p)) ?  PTR_ERR(p) : (long)pg_offset;
+}
 
-	result = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
-	if (result < 0)
+/*
+ * Find the given page, and call find_dirent() in order to try to
+ * return the next entry.
+ *
+ * Returns -EIO if the page is not available, or up to date.
+ */
+static inline
+long find_dirent_page(struct inode *inode, unsigned long offset,
+		      struct nfs_entry *entry)
+{
+	decode_dirent_t	decode = NFS_PROTO(inode)->decode_dirent;
+	struct page	*page;
+	long		status = -EIO;
+	int		plus = NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS;
+
+	dfprintk(VFS, "NFS: find_dirent_page() searching directory page @ offset %ld\n", offset);
+	page = find_page(inode, entry->offset & PAGE_CACHE_MASK);
+	if (!page)
 		goto out;
 
-	/*
-	 * Try to find the entry in the cache
-	 */
-again:
-	if (waitp) {
-		interruptible_sleep_on(waitp);
-		if (signal_pending(current))
-			return -ERESTARTSYS;
-		waitp = NULL;
-	}
+	if (PageUptodate(page))
+		status = find_dirent(page, offset, entry, decode, plus);
 
-	cookie = filp->f_pos;
-	entry  = NULL;
-	free   = NULL;
-	age    = ~(unsigned long) 0;
-	dead   = jiffies - NFS_ATTRTIMEO(inode);
+	/* NB: on successful return we will be holding the page */
+	if (status >= 0) {
+		if (entry->page)
+			page_cache_release(entry->page);
+		entry->page = page;
+	} else {
+		entry->page = NULL;
+		page_cache_release(page);
+	}
+
+ out:
+	dfprintk(VFS, "NFS: find_dirent_page() returns %ld\n", status);
+	return status;
+}
 
-	for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) {
-		/*
-		dprintk("NFS: dircache[%d] valid %d locked %d\n",
-					i, cache->valid, cache->locked);
-		 */
-		ismydir = (cache->dev == inode->i_dev
-				&& cache->ino == inode->i_ino);
-		if (cache->locked) {
-			if (!ismydir || cache->cookie != cookie)
-				continue;
-			dfprintk(DIRCACHE, "NFS: waiting on dircache entry\n");
-			waitp = &cache->wait;
-			goto again;
+
+/*
+ * Recurse through the page cache pages, and return a
+ * filled nfs_entry structure of the next directory entry if possible.
+ *
+ * We start the search at position 'offset'.
+ */
+static inline
+long search_cached_dirent_pages(struct inode *inode, unsigned long offset,
+				struct nfs_entry *entry)
+{
+	long		res = 0;
+	int		loop_count = 0;
+
+	dfprintk(VFS, "NFS: search_cached_dirent_pages() searching @ offset %ld\n", offset);
+	for (;;) {
+		res = find_dirent_page(inode, offset, entry);
+		if (res == -EAGAIN) {
+			/* Align to beginning of next page */
+			entry->offset &= PAGE_CACHE_MASK;
+			entry->offset += PAGE_CACHE_SIZE;
+		}
+		if (res != -EAGAIN)
+			break;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
 		}
+	}
+	if (res < 0 && entry->page) {
+		page_cache_release(entry->page);
+		entry->page = NULL;
+	}
+	dfprintk(VFS, "NFS: search_cached_dirent_pages() returned %ld\n", res);
+	return res;
+}
 
-		if (ismydir && cache->mtime != inode->i_mtime)
-			cache->valid = 0;
 
-		if (!cache->valid || cache->age < dead) {
-			free = cache;
-			age  = 0;
-		} else if (cache->age < age) {
-			free = cache;
-			age  = cache->age;
-		}
+/* Now we cache directories properly, by stuffing the dirent
+ * data directly in the page cache.
+ *
+ * Inode invalidation due to refresh etc. takes care of
+ * _everything_, no sloppy entry flushing logic, no extraneous
+ * copying, network direct to page cache, the way it was meant
+ * to be.
+ *
+ * NOTE: Dirent information verification is done always by the
+ *	 page-in of the RPC reply, nowhere else, this simplies
+ *	 things substantially.
+ */
+static inline
+long try_to_get_dirent_page(struct file *filp, struct inode *inode,
+			    unsigned long offset, __u64 cookie)
+{
+	struct dentry	*dir = filp->f_dentry;
+	struct page	*page;
+	struct rpc_cred	*cred = nfs_file_cred(filp);
+	struct nfs_fattr dir_attr;
+	long		res = 0;
+	int		plus = NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS;
+
+	offset &= PAGE_CACHE_MASK;
+	dfprintk(VFS, "NFS: try_to_get_dirent_page() reading directory page @ offset %ld\n", offset);
+	page = nfs_find_lock_page(inode, offset);
 
-		if (!ismydir || !cache->valid)
-			continue;
+	if (!page) {
+		res = -ENOMEM;
+		goto out;
+	}
 
-		if (cache->cookie == cookie && cache->size > 0) {
-			entry = cache->entry + (index = 0);
-			cache->locked = 1;
-			break;
+	if (!PageUptodate(page)) {
+		unsigned int	dtsize = NFS_SERVER(inode)->dtsize;
+		__u32		*p = (__u32 *)page_address(page);
+
+		if (dtsize > PAGE_CACHE_SIZE)
+			dtsize = PAGE_CACHE_SIZE;
+		res = NFS_CALL(readdir, inode, (dir, &dir_attr, cred,
+						cookie, p, dtsize, plus));
+		if (res < 0)
+			goto error;
+		nfs_refresh_inode(inode, &dir_attr);
+		if (PageError(page))
+			clear_bit(PG_error, &page->flags);
+		set_bit(PG_uptodate, &page->flags);
+	} else
+		dfprintk(VFS, "NFS: try_to_get_dirent_page(): page already up to date.\n");
+ unlock_out:
+	clear_bit(PG_locked, &page->flags);
+	wake_up(&page->wait);
+	page_cache_release(page);
+ out:
+	dfprintk(VFS, "NFS: try_to_get_dirent_page() returns %ld\n", res);
+	return res;
+
+ error:
+	set_bit(PG_error, &page->flags);
+	goto unlock_out;
+}
+
+/* Recover from a revalidation flush.  The case here is that
+ * the inode for the directory got invalidated somehow, and
+ * all of our cached information is lost.  In order to get
+ * a correct cookie for the current readdir request from the
+ * user, we must (re-)fetch all the older readdir page cache
+ * entries.
+ *
+ * Returns < 0 if some error occurs.
+ */
+static inline
+long refetch_to_readdir(struct file *filp, struct inode *inode,
+			unsigned long off, struct nfs_entry *entry)
+{
+	struct nfs_entry	my_dirent,
+				*dirent = &my_dirent;
+	long			res;
+	int			plus = NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS,
+				loop_count = 0;
+
+	dfprintk(VFS, "NFS: refetch_to_readdir() searching @ offset %ld\n", off);
+	*dirent = *entry;
+
+	for (res = 0;res >= 0 && !dirent->eof;) {
+
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
 		}
-		for (j = 0; j < cache->size; j++) {
-			__u32 *this_ent = cache->entry + j*3;
 
-			if (*(this_ent+1) != cookie)
-				continue;
-			if (j < cache->size - 1) {
-				index = j + 1;
-				entry = this_ent + 3;
-			} else if (*(this_ent+2) & (1 << 15)) {
-				/* eof */
-				return 0;
+		/* Search for last cookie in page cache */
+		res = search_cached_dirent_pages(inode, off, dirent);
+
+		if (res >= 0) {
+			/* Cookie was found */
+			if (dirent->offset >= off) {
+				*entry = *dirent;
+				break;
 			}
-			break;
+			if (dirent->page) {
+				page_cache_release(dirent->page);
+				dirent->page = NULL;
+			}
+			if (dirent->eof) {
+				res = -EBADCOOKIE;
+				*entry = *dirent;
+				break;
+			}
+			continue;
 		}
-		if (entry) {
-			dfprintk(DIRCACHE, "NFS: found dircache entry %d\n",
-						(int)(cache - dircache));
-			cache->locked = 1;
+
+		if (res != -EIO) {
+			*entry = *dirent;
 			break;
 		}
-	}
 
-	/*
-	 * Okay, entry not present in cache, or locked and inaccessible.
-	 * Set up the cache entry and attempt a READDIR call.
-	 */
-	if (entry == NULL) {
-		if ((cache = free) == NULL) {
-			dfprintk(DIRCACHE, "NFS: dircache contention\n");
-			waitp = &readdir_wait;
-			goto again;
+		/* Read in a new page */
+		res = try_to_get_dirent_page(filp, inode, dirent->offset, dirent->cookie);
+		/* Is the entry->cookie invalid? */
+		if (NFS_FILE_READTIME(filp) != NFS_CACHE_MTIME(inode)) {
+			memset(dirent, 0, sizeof(*dirent));
+			NFS_FILE_READTIME(filp) = NFS_CACHE_MTIME(inode);
+			off = 0;
 		}
-		dfprintk(DIRCACHE, "NFS: using free dircache entry %d\n",
-				(int)(free - dircache));
-		cache->cookie = cookie;
-		cache->locked = 1;
-		cache->valid  = 0;
-		cache->dev    = inode->i_dev;
-		cache->ino    = inode->i_ino;
-		if (!cache->entry) {
-			result = -ENOMEM;
-			cache->entry = (__u32 *) get_free_page(GFP_KERNEL);
-			if (!cache->entry)
-				goto done;
+		if (res == -EBADCOOKIE) {
+			memset(dirent, 0, sizeof(*dirent));
+			off = 0;
+			nfs_zap_caches(inode);
+			res = 0;
+		}
+		/* We requested READDIRPLUS, but the server doesn't grok it */
+		if (res == -ENOTSUPP && plus) {
+			NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+			plus = 0;
+			memset(dirent, 0, sizeof(*dirent));
+			off = 0;
+			nfs_zap_caches(inode);
+			res = 0;
 		}
-
-		result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(dentry),
-					cookie, PAGE_SIZE, cache->entry);
-		if (result <= 0)
-			goto done;
-		cache->size  = result;
-		cache->valid = 1;
-		entry = cache->entry + (index = 0);
-	}
-	cache->mtime = inode->i_mtime;
-	cache->age = jiffies;
-
-	/*
-	 * Yowza! We have a cache entry...
-	 */
-	start = (char *) cache->entry;
-	while (index < cache->size) {
-		__u32	fileid  = *entry++;
-		__u32	nextpos = *entry++; /* cookie */
-		__u32	length  = *entry++;
-
-		/*
-		 * Unpack the eof flag, offset, and length
-		 */
-		result = length & (1 << 15); /* eof flag */
-		name = start + ((length >> 16) & 0xFFFF);
-		length &= 0x7FFF;
-		/*
-		dprintk("NFS: filldir(%p, %.*s, %d, %d, %x, eof %x)\n", entry,
-				(int) length, name, length,
-				(unsigned int) filp->f_pos,
-				fileid, result);
-		 */
-
-		if (filldir(dirent, name, length, cookie, fileid) < 0)
-			break;
-		cookie = nextpos;
-		index++;
 	}
-	filp->f_pos = cookie;
-	result = 0;
-
-	/* XXX: May want to kick async readdir-ahead here. Not too hard
-	 * to do. */
 
-done:
-	dfprintk(DIRCACHE, "NFS: nfs_readdir complete\n");
-	cache->locked = 0;
-	wake_up(&cache->wait);
-	wake_up(&readdir_wait);
-
-out:
-	return result;
+	dfprintk(VFS, "NFS: refetch_to_readdir() returns %ld\n", res);
+	return res;
 }
 
 /*
- * Invalidate dircache entries for an inode.
+ * Once we've found the start of the dirent within a page: fill 'er up...
  */
-void
-nfs_invalidate_dircache(struct inode *inode)
+static
+int nfs_do_filldir(struct file *filp, struct inode *inode,
+		   struct nfs_entry *entry, void *dirent, filldir_t filldir)
 {
-	struct nfs_dirent *cache = dircache;
-	dev_t		dev = inode->i_dev;
-	ino_t		ino = inode->i_ino;
-	int		i;
-
-	dfprintk(DIRCACHE, "NFS: invalidate dircache for %x/%ld\n", dev, (long)ino);
-	for (i = NFS_MAX_DIRCACHE; i--; cache++) {
-		if (cache->ino != ino)
-			continue;
-		if (cache->dev != dev)
-			continue;
-		if (cache->locked) {
-			printk("NFS: cache locked for %s/%ld\n",
-				kdevname(dev), (long) ino);
-			continue;
+	decode_dirent_t decode = NFS_PROTO(inode)->decode_dirent;
+	struct page	*page = entry->page;
+	__u8		*p,
+			*start;
+	unsigned long	base = page->offset,
+			offset = entry->offset,
+			pg_offset,
+			fileid;
+	int		plus = NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS,
+			loop_count = 0,
+			count = 0,
+			res = 0;
+
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ offset %ld\n", entry->offset);
+	pg_offset = offset & ~PAGE_CACHE_MASK;
+	start = (u8*)page_address(page);
+	p = start + pg_offset;
+
+	for(;;) {
+		/* Note: entry->prev contains the offset of the start of the
+		 *       current dirent */
+		fileid = nfs_fileid_to_ino_t(entry->ino);
+		res = filldir(dirent, entry->name, entry->len, entry->prev, fileid);
+		if (res < 0)
+			break;
+		filp->f_pos = entry->offset;
+		count++;
+		p = (u8*)decode((__u32*)p, entry, plus);
+		if (!p || IS_ERR(p))
+			break;
+		pg_offset = p - start;
+		entry->prev = entry->offset;
+		entry->offset = base + pg_offset;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
 		}
-		cache->valid = 0;	/* brute force */
 	}
+	if (count > 0) {
+		res = count;
+		NFS_FILE_READTIME(filp) = NFS_CACHE_MTIME(inode);
+	}
+
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ offset %ld; returning = %d\n", entry->offset, res);
+	return res;
 }
 
-/*
- * Invalidate the dircache for a super block (or all caches),
- * and release the cache memory.
+/* The file offset position is now represented as a true offset into the
+ * page cache as is the case in most of the other filesystems.
  */
-void
-nfs_invalidate_dircache_sb(struct super_block *sb)
+static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct nfs_dirent *cache = dircache;
-	int		i;
+	struct dentry	*dentry = filp->f_dentry;
+	struct inode	*inode = dentry->d_inode;
+	struct page	*page;
+	struct nfs_entry my_entry,
+			*entry = &my_entry;
+	unsigned long	offset;
+	long		res,
+			entries = 0;
+
+	res = nfs_revalidate_inode(dentry);
+	if (res < 0)
+		return res;
 
-	for (i = NFS_MAX_DIRCACHE; i--; cache++) {
-		if (sb && sb->s_dev != cache->dev)
-			continue;
-		if (cache->locked) {
-			printk("NFS: cache locked at umount %s\n",
-				(cache->entry ? "(lost a page!)" : ""));
-			continue;
-		}
-		cache->valid = 0;	/* brute force */
-		if (cache->entry) {
-			free_page((unsigned long) cache->entry);
-			cache->entry = NULL;
+	/*
+	 * filp->f_pos points to the file offset in the page cache.
+	 * but if the cache has meanwhile been zapped, we need to
+	 * read from the last dirent to revalidate f_pos
+	 * itself.
+	 */
+	memset(entry, 0, sizeof(*entry));
+
+
+	if (filp->f_pos >= ULONG_MAX)
+		offset = 0;
+	else
+		offset = filp->f_pos;
+
+	while(!entry->eof) {
+		res = search_cached_dirent_pages(inode, offset, entry);
+
+		if (res < 0) {
+			if (entry->eof)
+				break;
+			res = refetch_to_readdir(filp, inode, offset, entry);
+			if (res < 0)
+				break;
 		}
-	}
-}
 
-/*
- * Free directory cache memory
- * Called from cleanup_module
- */
-void
-nfs_free_dircache(void)
-{
-	dfprintk(DIRCACHE, "NFS: freeing dircache\n");
-	nfs_invalidate_dircache_sb(NULL);
+		page = entry->page;
+		if (!page)
+			printk(KERN_ERR "NFS: Missing page...\n");
+		res = nfs_do_filldir(filp, inode, entry, dirent, filldir);
+		page_cache_release(page);
+		entry->page = NULL;
+		if (res < 0)
+			break;
+		offset = entry->offset;
+		entries += res;
+	}
+	if (res < 0 && res != -EBADCOOKIE && !entries)
+		return res;
+	return entries;
 }
 
 /*
  * Whenever an NFS operation succeeds, we know that the dentry
  * is valid, so we update the revalidation timestamp.
  */
-static inline void nfs_renew_times(struct dentry * dentry)
+static inline void
+nfs_renew_times(struct dentry * dentry)
 {
-	dentry->d_time = jiffies;
+		dentry->d_time = jiffies;
 }
 
 static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags)
@@ -384,7 +556,7 @@
 		if (diff < 15*60)
 			timeout = 0;
 	}
-	
+
 	return time_after(jiffies,dentry->d_time + timeout);
 }
 
@@ -398,8 +570,9 @@
 #define NFS_REVALIDATE_NEGATIVE (1 * HZ)
 static inline int nfs_neg_need_reval(struct dentry *dentry)
 {
-	unsigned long timeout = NFS_ATTRTIMEO(dentry->d_parent->d_inode);
-	long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime;
+	struct inode *dir = dentry->d_parent->d_inode;
+	unsigned long timeout = NFS_ATTRTIMEO(dir);
+	long diff = CURRENT_TIME - dir->i_mtime;
 
 	if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE)
 		timeout = NFS_REVALIDATE_NEGATIVE;
@@ -421,11 +594,12 @@
  */
 static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
 {
-	struct dentry * parent = dentry->d_parent;
-	struct inode * inode = dentry->d_inode;
+	struct dentry		*dir = dentry->d_parent;
+	struct inode		*inode = dentry->d_inode,
+				*dir_i = dir->d_inode;
+	struct nfs_fh		fhandle;
+	struct nfs_fattr	fattr, dir_attr;
 	int error;
-	struct nfs_fh fhandle;
-	struct nfs_fattr fattr;
 
 	/*
 	 * If we don't have an inode, let's look at the parent
@@ -440,39 +614,49 @@
 
 	if (is_bad_inode(inode)) {
 		dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
-			parent->d_name.name, dentry->d_name.name);
+			dir->d_name.name, dentry->d_name.name);
 		goto out_bad;
 	}
 
-	if (IS_ROOT(dentry))
-		goto out_valid;
-
 	if (!nfs_dentry_force_reval(dentry, flags))
 		goto out_valid;
 
+	if (IS_ROOT(dentry)) {
+		__nfs_revalidate_inode(dentry);
+		goto out_valid_renew;
+	}
+
 	/*
 	 * Do a new lookup and check the dentry attributes.
 	 */
-	error = nfs_proc_lookup(NFS_DSERVER(parent), NFS_FH(parent),
-				dentry->d_name.name, &fhandle, &fattr);
-	if (error)
+	error = NFS_CALL(lookup, dir_i, (dir, &dir_attr,
+				  &dentry->d_name, &fhandle, &fattr));
+	if (error < 0)
 		goto out_bad;
 
 	/* Inode number matches? */
-	if (fattr.fileid != inode->i_ino)
+	if (!(fattr.valid & NFS_ATTR_FATTR) ||
+	    NFS_FSID(inode) != fattr.fsid ||
+	    NFS_FILEID(inode) != fattr.fileid)
 		goto out_bad;
 
 	/* Filehandle matches? */
-	if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh)))
+	if (NFS_FH(dentry)->size == 0)
+		goto out_bad;
+
+	if (NFS_FH(dentry)->size != fhandle.size ||
+	    memcmp(NFS_FH(dentry)->data, fhandle.data, fhandle.size))
 		goto out_bad;
 
 	/* Ok, remeber that we successfully checked it.. */
-	nfs_renew_times(dentry);
 	nfs_refresh_inode(inode, &fattr);
+	nfs_refresh_inode(dir_i, &dir_attr);
 
-out_valid:
+ out_valid_renew:
+	nfs_renew_times(dentry);
+ out_valid:
 	return 1;
-out_bad:
+ out_bad:
 	if (!list_empty(&dentry->d_subdirs))
 		shrink_dcache_parent(dentry);
 	/* If we have submounts, don't unhash ! */
@@ -480,9 +664,9 @@
 		goto out_valid;
 	d_drop(dentry);
 	if (dentry->d_parent->d_inode)
-		nfs_invalidate_dircache(dentry->d_parent->d_inode);
+		nfs_zap_caches(dentry->d_parent->d_inode);
 	if (inode && S_ISDIR(inode->i_mode))
-		nfs_invalidate_dircache(inode);
+		nfs_zap_caches(inode);
 	return 0;
 }
 
@@ -497,18 +681,24 @@
 		dentry->d_flags);
 
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+		struct rpc_auth *auth = NULL;
+		struct rpc_cred *cred = nfs_dentry_cred(dentry);
 		int error;
 		
 		dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+		NFS_DENTRY(dentry)->cred = NULL;
+		if (dentry->d_inode)
+			auth = NFS_CLIENT(dentry->d_inode)->cl_auth;
 		/* Unhash it first */
 		d_drop(dentry);
-		error = nfs_safe_remove(dentry);
+		error = _nfs_safe_remove(dentry, cred);
+		if (cred && auth)
+			rpcauth_releasecred(auth, cred);
 		if (error)
-			printk("NFS: can't silly-delete %s/%s, error=%d\n",
+			printk(KERN_INFO "NFS: can't silly-delete %s/%s, error=%d\n",
 				dentry->d_parent->d_name.name,
 				dentry->d_name.name, error);
 	}
-
 }
 
 /*
@@ -529,33 +719,45 @@
 	NULL			/* d_iput */
 };
 
-static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry)
+static struct dentry *nfs_lookup(struct inode *dir_i, struct dentry * dentry)
 {
+	struct dentry *dir = dentry->d_parent;
 	struct inode *inode;
 	int error;
 	struct nfs_fh fhandle;
-	struct nfs_fattr fattr;
+	struct nfs_fattr fattr, dir_attr;
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
 	error = -ENAMETOOLONG;
-	if (dentry->d_name.len > NFS_MAXNAMLEN)
+	if (dentry->d_name.len > NFS_SERVER(dir_i)->namelen)
 		goto out;
 
-	error = -ENOMEM;
+	dentry->d_op = &nfs_dentry_operations;
+
 	if (!dentry->d_fsdata) {
-		dentry->d_fsdata = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
-		if (!dentry->d_fsdata)
+		dentry->d_fsdata = kmalloc(sizeof(struct nfs_dentry), GFP_KERNEL);
+		if (!dentry->d_fsdata) {
+			error = -ENOMEM;
 			goto out;
+		}
+		memset(dentry->d_fsdata, 0, sizeof(struct nfs_dentry));
 	}
-	dentry->d_op = &nfs_dentry_operations;
 
-	error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dentry->d_parent), 
-				dentry->d_name.name, &fhandle, &fattr);
+#if NFS_FIXME
+	inode = nfs_dircache_lookup(dir_i, dentry);
+	if (inode)
+		goto no_entry;
+#endif
+
+	error = NFS_CALL(lookup, dir_i, (dir, &dir_attr,
+				 &dentry->d_name, &fhandle, &fattr));
+	nfs_refresh_inode(dir_i, &dir_attr);
 	inode = NULL;
 	if (error == -ENOENT)
 		goto no_entry;
+
 	if (!error) {
 		error = -EACCES;
 		inode = nfs_fhget(dentry, &fhandle, &fattr);
@@ -594,78 +796,99 @@
  * that the operation succeeded on the server, but an error in the
  * reply path made it appear to have failed.
  */
-static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
+static int nfs_create(struct inode *dir_i, struct dentry *dentry, int mode)
 {
-	int error;
-	struct nfs_sattr sattr;
-	struct nfs_fattr fattr;
-	struct nfs_fh fhandle;
+	struct dentry	*dir = dentry->d_parent;
+	struct iattr	 attr;
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs_fh	 fhandle;
+	int		 error;
 
 	dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
 
-	sattr.mode = mode;
-	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
-	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+#ifdef NFSD_BROKEN_UID
+	/* We set uid/gid in the request because IBM's broken nfsd
+	 * uses the root uid/gid otherwise. Argh!
+	 * (Hopefully the server will override the gid when the directory
+	 * has the sticky bit set. Irix may have a problem here...)
+	 */
+	attr.ia_mode = mode;
+	attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
+	attr.ia_uid = current->fsuid;
+	attr.ia_gid = current->fsgid;
+#else
+	attr.ia_mode = mode;
+	attr.ia_valid = ATTR_MODE;
+#endif
 
 	/*
 	 * Invalidate the dir cache before the operation to avoid a race.
+	 * The 0 argument passed into the create function should one day
+	 * contain the O_EXCL flag if requested. This allows NFSv3 to
+	 * select the appropriate create strategy. Currently open_namei
+	 * does not pass the create flags.
 	 */
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-			dentry->d_name.name, &sattr, &fhandle, &fattr);
-	if (!error)
+	error = NFS_CALL(create, dir_i, (dir, &dir_attr, &dentry->d_name,
+			&attr, 0, &fhandle, &fattr));
+	if (!error && fhandle.size != 0)
 		error = nfs_instantiate(dentry, &fhandle, &fattr);
-	if (error)
+	if (error || fhandle.size == 0)
 		d_drop(dentry);
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
 	return error;
 }
 
 /*
  * See comments for nfs_proc_create regarding failed operations.
  */
-static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
+static int nfs_mknod(struct inode *dir_i, struct dentry *dentry, int mode, int rdev)
 {
-	int error;
-	struct nfs_sattr sattr;
-	struct nfs_fattr fattr;
-	struct nfs_fh fhandle;
+	struct dentry	*dir = dentry->d_parent;
+	struct iattr	 attr;
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs_fh	 fhandle;
+	int		 error;
 
 	dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
+
+	attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
+	attr.ia_mode = mode;
+	attr.ia_uid = current->fsuid;
+	attr.ia_gid = current->fsgid;
 
-	sattr.mode = mode;
-	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
-	if (S_ISCHR(mode) || S_ISBLK(mode))
-		sattr.size = rdev; /* get out your barf bag */
-	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
-
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name, &sattr, &fhandle, &fattr);
-	if (!error)
+
+	error = NFS_CALL(mknod, dir_i, (dir, &dir_attr, &dentry->d_name,
+				&attr, rdev, &fhandle, &fattr));
+	if (!error && fhandle.size != 0)
 		error = nfs_instantiate(dentry, &fhandle, &fattr);
-	if (error)
+	if (error || fhandle.size == 0)
 		d_drop(dentry);
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
 	return error;
 }
 
 /*
  * See comments for nfs_proc_create regarding failed operations.
  */
-static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int nfs_mkdir(struct inode *dir_i, struct dentry *dentry, int mode)
 {
-	int error;
-	struct nfs_sattr sattr;
-	struct nfs_fattr fattr;
-	struct nfs_fh fhandle;
-
-	dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
-
-	sattr.mode = mode | S_IFDIR;
-	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
-	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+	struct dentry   *dir = dentry->d_parent;
+	struct iattr	 attr;
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs_fh	 fhandle;
+	int		 error;
+
+	dfprintk(VFS, "NFS: mkdir(%x/%ld, %s)\n",
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
+
+	attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
+	attr.ia_mode = mode | S_IFDIR;
+	attr.ia_uid = current->fsuid;
+	attr.ia_gid = current->fsgid;
 
 	/*
 	 * Always drop the dentry, we can't always depend on
@@ -674,32 +897,39 @@
 	 * depending on potentially bogus information.
 	 */
 	d_drop(dentry);
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent),
-				dentry->d_name.name, &sattr, &fhandle, &fattr);
-	if (!error)
-		dir->i_nlink++;
+	error = NFS_CALL(mkdir, dir_i, (dir, &dir_attr,
+				&dentry->d_name, &attr, &fhandle, &fattr));
+	if (!error && fhandle.size != 0) {
+		dir_i->i_nlink ++;
+		error = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
+	if (error || fhandle.size == 0)
+		d_drop(dentry);
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
 	return error;
 }
 
-static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int nfs_rmdir(struct inode *dir_i, struct dentry *dentry)
 {
-	int error;
+	struct dentry	*dir = dentry->d_parent;
+	struct nfs_fattr dir_attr;
+	int		 error;
 
 	dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
 
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name);
+	error = NFS_CALL(rmdir, dir_i, (dir, &dir_attr, &dentry->d_name));
 
 	/* Update i_nlink and invalidate dentry. */
 	if (!error) {
 		d_drop(dentry);
-		if (dir->i_nlink)
-			dir->i_nlink--;
+		if (dir_i->i_nlink)
+			dir_i->i_nlink --;
 	}
 
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
 	return error;
 }
 
@@ -758,15 +988,18 @@
 	return sdentry;
 }
 
-static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
+static int nfs_sillyrename(struct inode *dir_i, struct dentry *dentry)
 {
+	struct dentry	*dir = dentry->d_parent;
 	static unsigned int sillycounter = 0;
-	const int      i_inosize  = sizeof(dir->i_ino)*2;
-	const int      countersize = sizeof(sillycounter)*2;
-	const int      slen       = strlen(".nfs") + i_inosize + countersize;
-	char           silly[slen+1];
-	struct dentry *sdentry;
-	int            error = -EIO;
+	struct nfs_fattr dir_attr;
+	const int        i_inosize  = sizeof(dir_i->i_ino)*2;
+	const int        countersize = sizeof(sillycounter)*2;
+	const int        slen       = strlen(".nfs") + i_inosize + countersize;
+	struct qstr      qsilly;
+	char             silly[slen+1];
+	struct dentry *  sdentry;
+	int              error = -EIO;
 
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, 
@@ -782,9 +1015,9 @@
 	}
 
 #ifdef NFS_PARANOIA
-if (!dentry->d_inode)
-printk("NFS: silly-renaming %s/%s, negative dentry??\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
+	if (!dentry->d_inode)
+		printk(KERN_ERR "NFS: silly-renaming %s/%s, negative dentry??\n",
+		       dentry->d_parent->d_name.name, dentry->d_name.name);
 #endif
 	/*
 	 * We don't allow a dentry to be silly-renamed twice.
@@ -816,16 +1049,19 @@
 			goto out;
 	} while(sdentry->d_inode != NULL); /* need negative lookup */
 
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_rename(NFS_SERVER(dir),
-				NFS_FH(dentry->d_parent), dentry->d_name.name,
-				NFS_FH(dentry->d_parent), silly);
+	qsilly.name = silly;
+	qsilly.len  = strlen(silly);
+	error = NFS_CALL(rename, dir_i, (dir, &dir_attr, &dentry->d_name,
+				  dir, &dir_attr, &qsilly));
 	if (!error) {
 		nfs_renew_times(dentry);
 		d_move(dentry, sdentry);
 		dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+		NFS_DENTRY(dentry)->cred = rpcauth_lookupcred(NFS_CLIENT(dentry->d_inode)->cl_auth, 0);
  		/* If we return 0 we don't unlink */
 	}
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
 	dput(sdentry);
 out:
 	return error;
@@ -838,11 +1074,13 @@
  * We update inode->i_nlink and free the inode prior to the operation
  * to avoid possible races if the server reuses the inode.
  */
-static int nfs_safe_remove(struct dentry *dentry)
+static int _nfs_safe_remove(struct dentry *dentry, struct rpc_cred *cred)
 {
-	struct inode *dir = dentry->d_parent->d_inode;
-	struct inode *inode = dentry->d_inode;
-	int error, rehash = 0;
+	struct nfs_fattr dir_attr;
+	struct dentry	*dir = dentry->d_parent;
+	struct inode	*dir_i = dir->d_inode,   
+			*inode = dentry->d_inode;
+	int		 error, rehash = 0;
 		
 	dfprintk(VFS, "NFS: safe_remove(%s/%s, %ld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -852,18 +1090,19 @@
 	error = -EBUSY;
 	if (!inode) {
 #ifdef NFS_PARANOIA
-printk("nfs_safe_remove: %s/%s already negative??\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
+		printk(KERN_ERR "nfs_safe_remove: %s/%s already negative??\n",
+		       dentry->d_parent->d_name.name, dentry->d_name.name);
 #endif
 	}
 
 	if (dentry->d_count > 1) {
 #ifdef NFS_PARANOIA
-printk("nfs_safe_remove: %s/%s busy, d_count=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
+		printk(KERN_INFO "nfs_safe_remove: %s/%s busy, d_count=%d\n",
+		       dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
 #endif
 		goto out;
 	}
+
 	/*
 	 * Unhash the dentry while we remove the file ...
 	 */
@@ -879,9 +1118,10 @@
 			inode->i_nlink --;
 		d_delete(dentry);
 	}
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name);
+	error = NFS_CALL(remove, dir_i, (dir, &dir_attr, &dentry->d_name, cred));
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
+
 	/*
 	 * Rehash the negative dentry if the operation succeeded.
 	 */
@@ -891,6 +1131,12 @@
 	return error;
 }
 
+static int nfs_safe_remove(struct dentry *dentry)
+{
+	return _nfs_safe_remove(dentry, NULL);
+}
+
+
 /*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
  *  belongs to an active ".nfs..." file and we return -EBUSY.
  *
@@ -914,30 +1160,38 @@
 }
 
 static int
-nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+nfs_symlink(struct inode *dir_i, struct dentry *dentry, const char *symname)
 {
-	struct nfs_sattr sattr;
-	int error;
+	struct dentry	*dir = dentry->d_parent;
+	struct nfs_fattr dir_attr, sym_attr;
+	struct nfs_fh    sym_fh;
+	struct iattr     attr;
+	struct qstr      qsymname;
+	int              error, mode, maxlen;
 
 	dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name, symname);
 
 	error = -ENAMETOOLONG;
-	if (strlen(symname) > NFS_MAXPATHLEN)
+	maxlen = (NFS_PROTO(dir_i)->version==2) ? NFS2_MAXPATHLEN : NFS3_MAXPATHLEN;
+	if (strlen(symname) > maxlen)
 		goto out;
 
 #ifdef NFS_PARANOIA
-if (dentry->d_inode)
-printk("nfs_proc_symlink: %s/%s not negative!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
+	if (dentry->d_inode)
+		printk(KERN_WARNING "nfs_proc_symlink: %s/%s not negative!\n",
+		       dentry->d_parent->d_name.name, dentry->d_name.name);
 #endif
 	/*
 	 * Fill in the sattr for the call.
+
  	 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
 	 */
-	sattr.mode = S_IFLNK | S_IRWXUGO;
-	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
-	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+	attr.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
+	attr.ia_mode = mode = S_IFLNK | S_IRWXUGO;
+	attr.ia_uid = current->fsuid;
+	attr.ia_gid = current->fsgid;
+
 
 	/*
 	 * Drop the dentry in advance to force a new lookup.
@@ -945,14 +1199,22 @@
 	 * can't instantiate the new inode.
 	 */
 	d_drop(dentry);
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name, symname, &sattr);
-	if (!error) {
-		nfs_renew_times(dentry->d_parent);
-	} else if (error == -EEXIST) {
-		printk("nfs_proc_symlink: %s/%s already exists??\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
+	qsymname.name = symname;
+	qsymname.len  = strlen(symname);
+
+	error = NFS_CALL(symlink, dir_i, (dir, &dir_attr,
+				&dentry->d_name, &qsymname, &attr,
+				&sym_fh, &sym_attr));
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
+	if (!error && sym_fh.size != 0 && (sym_attr.valid & NFS_ATTR_FATTR)) {
+		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
+	} else {
+		if (error == -EEXIST)
+			printk(KERN_INFO "nfs_proc_symlink: %s/%s already exists??\n",
+			       dentry->d_parent->d_name.name,
+			       dentry->d_name.name);
+		d_drop(dentry);
 	}
 
 out:
@@ -960,10 +1222,12 @@
 }
 
 static int 
-nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+nfs_link(struct dentry *old_dentry, struct inode *dir_i, struct dentry *dentry)
 {
-	struct inode *inode = old_dentry->d_inode;
-	int error;
+	struct dentry	*dir = dentry->d_parent;
+	struct inode	*inode = old_dentry->d_inode;
+	struct nfs_fattr old_attr, dir_attr;
+	int		 error;
 
 	dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
@@ -975,16 +1239,18 @@
 	 * we can't use the existing dentry.
 	 */
 	d_drop(dentry);
-	nfs_invalidate_dircache(dir);
-	error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry),
-				NFS_FH(dentry->d_parent), dentry->d_name.name);
+	error = NFS_CALL(link, inode, (old_dentry, &old_attr,
+				       dir, &dir_attr, &dentry->d_name));
 	if (!error) {
- 		/*
+		/*
 		 * Update the link count immediately, as some apps
 		 * (e.g. pine) test this after making a link.
 		 */
 		inode->i_nlink++;
 	}
+	nfs_refresh_inode(inode, &old_attr);
+	nfs_refresh_inode(dir_i, &dir_attr);
+	nfs_zap_caches(dir_i);
 	return error;
 }
 
@@ -1011,14 +1277,19 @@
  * no pending writes (if it's a file), and the use count must be 1.
  * If these conditions are met, we can drop the dentries before doing
  * the rename.
+ *
+ * FIXME: Sun seems to take this even one step further. The connectathon
+ * test suite has a file that renames open file A to open file B,
+ * and expects a silly rename to happen for B.
  */
 static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		      struct inode *new_dir, struct dentry *new_dentry)
 {
-	struct inode *old_inode = old_dentry->d_inode;
-	struct inode *new_inode = new_dentry->d_inode;
-	struct dentry *dentry = NULL;
-	int error, rehash = 0;
+	struct nfs_fattr old_attr, new_attr;
+	struct inode *   old_inode = old_dentry->d_inode;
+	struct inode *   new_inode = new_dentry->d_inode;
+	struct dentry *  dentry = NULL;
+	int              error, rehash = 0;
 
 	dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
@@ -1056,8 +1327,9 @@
 		/* dentry still busy? */
 		if (new_dentry->d_count > 1) {
 #ifdef NFS_PARANOIA
-printk("nfs_rename: target %s/%s busy, d_count=%d\n",
-new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+		printk(KERN_INFO "nfs_rename: target %s/%s busy, d_count=%d\n",
+			new_dentry->d_parent->d_name.name,
+			new_dentry->d_name.name,new_dentry->d_count);
 #endif
 			goto out;
 		}
@@ -1073,8 +1345,8 @@
 
 	if (new_dentry->d_count > 1 && new_inode) {
 #ifdef NFS_PARANOIA
-printk("nfs_rename: new dentry %s/%s busy, d_count=%d\n",
-new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+		printk(KERN_INFO "nfs_rename: new dentry %s/%s busy, d_count=%d\n",
+			new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
 #endif
 		goto out;
 	}
@@ -1090,11 +1362,13 @@
 	if (new_inode)
 		d_delete(new_dentry);
 
-	nfs_invalidate_dircache(new_dir);
-	nfs_invalidate_dircache(old_dir);
-	error = nfs_proc_rename(NFS_DSERVER(old_dentry),
-			NFS_FH(old_dentry->d_parent), old_dentry->d_name.name,
-			NFS_FH(new_dentry->d_parent), new_dentry->d_name.name);
+	error = NFS_CALL(rename, old_dir,
+			 (old_dentry->d_parent, &old_attr, &old_dentry->d_name,
+			  new_dentry->d_parent, &new_attr, &new_dentry->d_name));
+	nfs_refresh_inode(old_dir, &old_attr);
+	nfs_zap_caches(old_dir);
+	nfs_refresh_inode(new_dir, &new_attr);
+	nfs_zap_caches(new_dir);
 
 	/* Update the dcache if needed */
 	if (rehash)
--- linux/fs/nfs/file.c.nfsattack-gafton	Tue Oct 26 20:53:42 1999
+++ linux/fs/nfs/file.c	Fri Feb  4 23:26:34 2000
@@ -1,6 +1,8 @@
 /*
  *  linux/fs/nfs/file.c
  *
+ *  NFS regular file handling.
+ *
  *  Copyright (C) 1992  Rick Sladkey
  *
  *  Changes Copyright (C) 1994 by Florian La Roche
@@ -12,8 +14,6 @@
  *  Expire cache on write to a file by Wai S Kok (Oct 1994).
  *
  *  Total rewrite of read side for new NFS buffer cache.. Linus.
- *
- *  nfs regular file handling functions
  */
 
 #include <linux/sched.h>
@@ -21,17 +21,24 @@
 #include <linux/errno.h>
 #include <linux/fcntl.h>
 #include <linux/stat.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
 #include <linux/pagemap.h>
 #include <linux/lockd/bind.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
 
 #include <asm/segment.h>
 #include <asm/system.h>
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
+static int  nfs_file_open(struct inode *, struct file *);
+static int  nfs_file_release(struct inode *, struct file *);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
 static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *);
 static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *);
@@ -46,9 +53,9 @@
 	NULL,			/* select - default */
 	NULL,			/* ioctl - default */
 	nfs_file_mmap,		/* mmap */
-	nfs_open,		/* open */
+	nfs_file_open,		/* open */
 	nfs_file_flush,		/* flush */
-	nfs_release,		/* release */
+	nfs_file_release,	/* release */
 	nfs_fsync,		/* fsync */
 	NULL,			/* fasync */
 	NULL,			/* check_media_change */
@@ -89,21 +96,87 @@
  *
  */
 static int
-nfs_file_flush(struct file *file)
+nfs_file_flush(struct file *filp)
 {
-	struct inode	*inode = file->f_dentry->d_inode;
-	int		status;
+	struct dentry	*dentry = filp->f_dentry;
+	struct inode	*inode = dentry->d_inode;
 
 	dfprintk(VFS, "nfs: flush(%x/%ld)\n", inode->i_dev, inode->i_ino);
+	return nfs_fsync(filp, dentry);
+}
 
-	status = nfs_wb_file(inode, file);
-	if (!status) {
-		status = file->f_error;
-		file->f_error = 0;
-	}
+struct nfs_file *nfs_file_alloc(void)
+{
+	struct nfs_file *p;
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p)
+		memset(p, 0, sizeof(*p));
+	return p;
+}
+
+void nfs_file_free(struct nfs_file *p)
+{
+	kfree(p);
+}
+
+
+/*
+ * Open the file.
+ * This is a no-op for NFSv2, but on NFSv3 we will check the user's
+ * access permission. This is more consistent with the Unix `check on
+ * open' philosophy.
+ */
+static int
+nfs_file_open(struct inode *inode, struct file *filp)
+{
+	struct dentry		*dentry = filp->f_dentry;
+	struct rpc_auth		*auth = NFS_CLIENT(inode)->cl_auth;
+	struct nfs_fattr	fattr;
+	struct nfs_file		*data;
+	int			status = 0, how = 0;
+
+	if (NFS_PROTO(inode)->access == 0 || (filp->f_flags & O_CREAT))
+		goto lookup_cred;
+
+	/* Don't try ACCESS on setuid processes */
+	if (current->uid != current->fsuid || current->gid != current->fsgid)
+		goto lookup_cred;
+
+	if (filp->f_mode & FMODE_READ)
+		how |= MAY_READ;
+	if (filp->f_mode & FMODE_WRITE)
+		how |= MAY_WRITE;
+	status = NFS_CALL(access, inode, (dentry, how, &fattr));
+	if (status)
+		goto out;
+	nfs_refresh_inode(inode, &fattr);
+
+ lookup_cred:
+	data = nfs_file_alloc();
+	if (data) {
+		data->cred = rpcauth_lookupcred(auth, 0);
+		filp->private_data = data;
+	} else
+		status = -ENOMEM;
+ out:
 	return status;
 }
 
+
+static int
+nfs_file_release(struct inode *inode, struct file *filp)
+{
+	struct nfs_file		*file = NFS_FILE(filp);
+	struct rpc_auth		*auth = NFS_CLIENT(inode)->cl_auth;
+	struct rpc_cred		*cred;
+
+	cred = nfs_file_cred(filp);
+	if (cred)
+		rpcauth_releasecred(auth, cred);
+	nfs_file_free(file);
+	return 0;
+}
+
 static ssize_t
 nfs_file_read(struct file * file, char * buf, size_t count, loff_t *ppos)
 {
@@ -114,7 +187,7 @@
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long) *ppos);
 
-	result = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
+	result = nfs_revalidate_inode(dentry);
 	if (!result)
 		result = generic_file_read(file, buf, count, ppos);
 	return result;
@@ -129,7 +202,7 @@
 	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	status = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
+	status = nfs_revalidate_inode(dentry);
 	if (!status)
 		status = generic_file_mmap(file, vma);
 	return status;
@@ -174,7 +247,7 @@
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
 		goto out_swapfile;
-	result = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
+	result = nfs_revalidate_inode(dentry);
 	if (result)
 		goto out;
 
@@ -197,11 +270,13 @@
 int
 nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct inode * inode = filp->f_dentry->d_inode;
+	struct dentry * dentry = filp->f_dentry;
+	struct inode * inode = dentry->d_inode;
 	int	status = 0;
 
-	dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%ld:%ld)\n",
+	dprintk("NFS: nfs_lock(f=%4x/%ld, c=%x, t=%x, fl=%x, r=%ld:%ld)\n",
 			inode->i_dev, inode->i_ino,
+			cmd,
 			fl->fl_type, fl->fl_flags,
 			fl->fl_start, fl->fl_end);
 
@@ -235,17 +310,17 @@
 	 */
 	status = nfs_wb_all(inode);
 	if (status < 0)
-		return status;
+		goto out_unlock;
 
-	if ((status = nlmclnt_proc(inode, cmd, fl)) < 0)
-		return status;
-	else
+	status = nlmclnt_proc(inode, cmd, fl);
+	if (status >= 0)
 		status = 0;
 
 	/*
 	 * Make sure we re-validate anything we've got cached.
 	 * This makes locking act as a cache coherency point.
 	 */
+ out_unlock:
  out_ok:
 	NFS_CACHEINV(inode);
 	return status;
--- linux/fs/nfs/inode.c.nfsattack-gafton	Fri Feb  4 23:26:28 2000
+++ linux/fs/nfs/inode.c	Fri Feb  4 23:26:34 2000
@@ -24,11 +24,18 @@
 #include <linux/errno.h>
 #include <linux/locks.h>
 #include <linux/unistd.h>
+#include <linux/major.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
 #include <linux/lockd/bind.h>
 
+#include <asm/spinlock.h>
+
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
@@ -36,17 +43,19 @@
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define NFS_PARANOIA 1
 
-static struct inode * __nfs_fhget(struct super_block *, struct nfs_fattr *);
-static void nfs_zap_caches(struct inode *);
-static void nfs_invalidate_inode(struct inode *);
+extern int nfs_reqlist_alloc(struct nfs_server *);
+extern void nfs_reqlist_free(struct nfs_server *);
+
+static struct inode * __nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *);
 
 static void nfs_read_inode(struct inode *);
 static void nfs_put_inode(struct inode *);
 static void nfs_delete_inode(struct inode *);
 static int  nfs_notify_change(struct dentry *, struct iattr *);
 static void nfs_put_super(struct super_block *);
-static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct statfs *, int);
+static void nfs_clear_inode(struct inode *);
+static void nfs_umount_begin(struct super_block *);
 
 static struct super_operations nfs_sops = { 
 	nfs_read_inode,		/* read inode */
@@ -57,12 +66,63 @@
 	nfs_put_super,		/* put superblock */
 	NULL,			/* write superblock */
 	nfs_statfs,		/* stat filesystem */
-	NULL,			/* no remount */
-	NULL,			/* no clear inode */
+	NULL,			/* remount */
+	nfs_clear_inode,	/* clear inode */
 	nfs_umount_begin	/* umount attempt begin */
 };
 
-struct rpc_stat			nfs_rpcstat = { &nfs_program };
+
+/*
+ * RPC crutft for NFS
+ */
+static struct rpc_stat		nfs_rpcstat = { &nfs_program };
+static struct rpc_version *	nfs_version[] = {
+	NULL,
+	NULL,
+	&nfs_version2,
+#ifdef CONFIG_NFS_V3
+	&nfs_version3,
+#endif
+};
+
+struct rpc_program		nfs_program = {
+	"nfs",
+	NFS_PROGRAM,
+	sizeof(nfs_version) / sizeof(nfs_version[0]),
+	nfs_version,
+	&nfs_rpcstat,
+};
+
+
+
+spinlock_t nfs_inode_lock = SPIN_LOCK_UNLOCKED;
+
+static inline void nfs_unalias_inode(struct inode *inode)
+{
+	spin_lock(&nfs_inode_lock);
+	list_del(&inode->u.nfs_i.i_alias64);
+	INIT_LIST_HEAD(&inode->u.nfs_i.i_alias64);	
+	spin_unlock(&nfs_inode_lock);
+}
+
+static inline void nfs_unhash_inode(struct inode *inode)
+{
+	remove_inode_hash(inode);
+	nfs_unalias_inode(inode);
+}
+
+static inline unsigned long
+nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
+{
+	return nfs_fileid_to_ino_t(fattr->fileid);
+}
+
+
+/*
+ * We don't keep the file handle in the inode anymore to avoid bloating
+ * struct inode and use a pointer to external memory instead.
+ */
+#define NFS_SB_FHSIZE(sb)	((sb)->u.nfs_sb.s_fhsize)
 
 /*
  * The "read_inode" function doesn't actually do anything:
@@ -78,8 +138,11 @@
 	inode->i_mode = 0;
 	inode->i_rdev = 0;
 	inode->i_op = NULL;
+	NFS_FILEID(inode) = 0;
+	NFS_FSID(inode) = 0;
 	NFS_CACHEINV(inode);
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
+	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 }
 
 static void
@@ -89,60 +152,66 @@
 	/*
 	 * We want to get rid of unused inodes ...
 	 */
-	if (inode->i_count == 1)
+	if (inode->i_count == 1) {
 		inode->i_nlink = 0;
+		remove_inode_hash(inode);
+	}
 }
 
 static void
 nfs_delete_inode(struct inode * inode)
 {
-	int failed;
-
 	dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
 	/*
 	 * Flush out any pending write requests ...
 	 */
-	if (NFS_WRITEBACK(inode) != NULL) {
+	if (NFS_CLUSTERS(inode) != NULL) {
 		unsigned long timeout = jiffies + 5*HZ;
 #ifdef NFS_DEBUG_VERBOSE
-printk("nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
+		printk(KERN_WARNING "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
 #endif
-		nfs_inval(inode);
-		while (NFS_WRITEBACK(inode) != NULL &&
+		while (NFS_CLUSTERS(inode) != NULL &&
 		       time_before(jiffies, timeout)) {
 			current->state = TASK_INTERRUPTIBLE;
 			schedule_timeout(HZ/10);
 		}
 		current->state = TASK_RUNNING;
-		if (NFS_WRITEBACK(inode) != NULL)
-			printk("NFS: Arghhh, stuck RPC requests!\n");
+		if (NFS_CLUSTERS(inode) != NULL)
+			printk(KERN_WARNING "NFS: Arghhh, stuck RPC requests!\n");
 	}
-
-	failed = nfs_check_failed_request(inode);
-	if (failed)
-		printk("NFS: inode %ld had %d failed requests\n",
-			inode->i_ino, failed);
 	clear_inode(inode);
 }
 
+static void
+nfs_clear_inode(struct inode *inode)
+{
+	dprintk("NFS: clear_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
+	nfs_unalias_inode(inode);
+}
+
 void
 nfs_put_super(struct super_block *sb)
 {
 	struct nfs_server *server = &sb->u.nfs_sb.s_server;
 	struct rpc_clnt	*rpc;
 
+	/*
+	 * First get rid of the request flushing daemon.
+	 * Relies on rpc_shutdown_client() waiting on all
+	 * client tasks to finish.
+	 */
+	nfs_reqlist_exit(server);
+
 	if ((rpc = server->client) != NULL)
 		rpc_shutdown_client(rpc);
 
+	nfs_reqlist_free(server);
+
 #if 0
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
 #endif
 	rpciod_down();		/* release rpciod */
-	/*
-	 * Invalidate the dircache for this superblock.
-	 */
-	nfs_invalidate_dircache_sb(sb);
 
 	kfree(server->hostname);
 
@@ -160,17 +229,10 @@
 		rpc_killall_tasks(rpc);
 }
 
-/*
- * Compute and set NFS server blocksize
- */
-static unsigned int
-nfs_block_size(unsigned int bsize, unsigned char *nrbitsp)
-{
-	if (bsize < 1024)
-		bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
-	else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
-		bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
 
+static inline unsigned long
+nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
+{
 	/* make sure blocksize is a power of two */
 	if ((bsize & (bsize - 1)) || nrbitsp) {
 		unsigned int	nrbits;
@@ -180,14 +242,56 @@
 		bsize = 1 << nrbits;
 		if (nrbitsp)
 			*nrbitsp = nrbits;
-		if (bsize < NFS_DEF_FILE_IO_BUFFER_SIZE)
-			bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
 	}
 
 	return bsize;
 }
 
 /*
+ * Calculate the number of 512byte blocks used.
+ */
+static inline unsigned long
+nfs_calc_block_size(u64 tsize)
+{
+	off_t used = nfs_size_to_off_t(tsize);
+	return (used + 511) / 512;
+}
+
+/*
+ * Compute and set NFS server blocksize
+ */
+static inline unsigned long
+nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
+{
+	if (bsize < 1024)
+		bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
+		bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+
+	return nfs_block_bits(bsize, nrbitsp);
+}
+
+/*
+ * Obtain the root inode of the file system.
+ */
+static struct inode *
+nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh)
+{
+	struct nfs_server	*server = &sb->u.nfs_sb.s_server;
+	struct nfs_fattr	fattr;
+	struct inode		*inode;
+	int			error;
+
+	if ((error = server->rpc_ops->getroot(server, rootfh, &fattr)) < 0) {
+		printk(KERN_NOTICE "nfs_get_root: getattr error = %d\n", -error);
+		return NULL;
+	}
+
+	inode = __nfs_fhget(sb, rootfh, &fattr);
+	return inode;
+}
+
+/*
  * The way this works is that the mount process passes a structure
  * in the data argument which contains the server's IP address
  * and the root file handle obtained from the server's mount
@@ -198,38 +302,52 @@
 {
 	struct nfs_mount_data	*data = (struct nfs_mount_data *) raw_data;
 	struct nfs_server	*server;
-	struct rpc_xprt		*xprt;
-	struct rpc_clnt		*clnt;
-	struct nfs_fh		*root_fh;
+	struct rpc_xprt         *xprt = 0;
+	struct rpc_clnt         *clnt = 0;
+	struct nfs_dentry       *root_d_fsdata = NULL;
+	struct nfs_fh           *root = &data->root, *root_fh, fh;
 	struct inode		*root_inode;
 	unsigned int		authflavor;
-	int			tcp;
 	struct sockaddr_in	srvaddr;
 	struct rpc_timeout	timeparms;
-	struct nfs_fattr	fattr;
+	struct nfs_fsinfo       fsinfo;
+	int                     tcp, version, maxlen;
 
 	MOD_INC_USE_COUNT;
-	if (!data)
-		goto out_miss_args;
+	sb->u.nfs_sb.s_root = NULL;
+	if (!data) {
+		printk(KERN_NOTICE "nfs_read_super: missing data argument\n");
+		goto failure;
+	}
 
 	/* No NFS V3. */
 	if (data->flags & NFS_MOUNT_VER3)
-		goto out_fail;
+		goto failure;
 
 	/* Don't complain if "mount" is newer. */
+	memset(&fh, 0, sizeof(fh));
 	if (data->version < NFS_MOUNT_VERSION) {
-		printk("nfs warning: mount version %s than kernel\n",
+		printk(KERN_WARNING "nfs warning: mount version %s than kernel\n",
 			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
 		if (data->version < 2)
 			data->namlen = 0;
 		if (data->version < 3)
 			data->bsize  = 0;
+		if (data->version < 4) {
+			data->flags &= ~NFS_MOUNT_VER3;
+			root = &fh;
+			root->size = NFS2_FHSIZE;
+			memcpy(root->data, data->old_root.data, NFS2_FHSIZE);
+		}
+
 	}
 
 	/* We now require that the mount process passes the remote address */
 	memcpy(&srvaddr, &data->addr, sizeof(srvaddr));
-	if (srvaddr.sin_addr.s_addr == INADDR_ANY)
-		goto out_no_remote;
+	if (srvaddr.sin_addr.s_addr == INADDR_ANY) {
+		printk(KERN_WARNING "NFS: mount program didn't pass remote address!\n");
+		goto failure;
+	}
 
 	lock_super(sb);
 
@@ -237,12 +355,16 @@
 
 	sb->s_magic      = NFS_SUPER_MAGIC;
 	sb->s_op         = &nfs_sops;
-	sb->s_blocksize  = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
-	sb->u.nfs_sb.s_root = data->root;
+
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = nfs_block_bits(data->bsize, &sb->s_blocksize_bits);
+
 	server           = &sb->u.nfs_sb.s_server;
+	memset(server, 0, sizeof(*server));
+
 	server->rsize    = nfs_block_size(data->rsize, NULL);
 	server->wsize    = nfs_block_size(data->wsize, NULL);
-	server->flags    = data->flags;
+	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
 
 	if (data->flags & NFS_MOUNT_NOAC) {
 		data->acregmin = data->acregmax = 0;
@@ -253,11 +375,33 @@
 	server->acdirmin = data->acdirmin*HZ;
 	server->acdirmax = data->acdirmax*HZ;
 
+	server->namelen  = data->namlen;
 	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
 	if (!server->hostname)
-		goto out_unlock;
+		goto failure_unlock;
 	strcpy(server->hostname, data->hostname);
 
+	/* Check NFS protocol revision and initialize RPC op vector
+	 * and file handle pool. */
+	if (data->flags & NFS_MOUNT_VER3) {
+#ifdef CONFIG_NFS_V3
+		server->rpc_ops = &nfs_v3_clientops;
+		NFS_SB_FHSIZE(sb) = sizeof(unsigned short) + NFS3_FHSIZE;
+		version = 3;
+		if (data->version < 4) {
+			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
+			goto failure_unlock;
+		}
+#else
+		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
+		goto failure_unlock;
+#endif
+	} else {
+		server->rpc_ops = &nfs_v2_clientops;
+		NFS_SB_FHSIZE(sb) = sizeof(unsigned short) + NFS2_FHSIZE;
+		version = 2;
+	}
+
 	/* Which protocol do we use? */
 	tcp   = (data->flags & NFS_MOUNT_TCP);
 
@@ -267,11 +411,18 @@
 	timeparms.to_maxval  = tcp? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
 	timeparms.to_exponential = 1;
 
+	if (!timeparms.to_initval)
+		timeparms.to_initval = 7 * HZ / 10;
+	if (!timeparms.to_retries)
+		timeparms.to_retries = 3;
+
 	/* Now create transport and client */
 	xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP,
 						&srvaddr, &timeparms);
-	if (xprt == NULL)
-		goto out_no_xprt;
+	if (xprt == NULL) {
+		printk(KERN_NOTICE "NFS: cannot create RPC transport. \n");
+		goto failure_unlock;
+	}
 
 	/* Choose authentication flavor */
 	authflavor = RPC_AUTH_UNIX;
@@ -281,9 +432,11 @@
 		authflavor = RPC_AUTH_KRB;
 
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-						NFS_VERSION, authflavor);
-	if (clnt == NULL)
-		goto out_no_client;
+						version, authflavor);
+	if (clnt == NULL) {
+		printk(KERN_NOTICE "NFS: cannot create RPC client \n");
+		goto failure_unlock;
+	}
 
 	clnt->cl_intr     = (data->flags & NFS_MOUNT_INTR)? 1 : 0;
 	clnt->cl_softrtry = (data->flags & NFS_MOUNT_SOFT)? 1 : 0;
@@ -291,29 +444,78 @@
 	server->client    = clnt;
 
 	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0)
-		goto out_no_iod;
+	if (rpciod_up() != 0) {
+		printk(KERN_NOTICE "NFS: cannot start rpciod!\n");
+		goto failure_unlock;
+	}
 
 	/*
 	 * Keep the super block locked while we try to get 
 	 * the root fh attributes.
 	 */
-	root_fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
-	if (!root_fh)
+	root_d_fsdata = kmalloc(sizeof(*root_d_fsdata), GFP_KERNEL);
+	if (!root_d_fsdata)
 		goto out_no_fh;
-	*root_fh = data->root;
 
-	if (nfs_proc_getattr(server, root_fh, &fattr) != 0)
-		goto out_no_fattr;
+	memset(root_d_fsdata, 0, sizeof(*root_d_fsdata));
+	root_fh = &root_d_fsdata->fh;
+	memcpy((u8*)root_fh, (u8*)root, sizeof(*root));
+
+	if (! (root_inode = nfs_get_root(sb, root)))
+		goto failure_put_root;
+
+	if (! (sb->s_root = d_alloc_root(root_inode, NULL)))
+		goto failure_put_root;
 
-	root_inode = __nfs_fhget(sb, &fattr);
-	if (!root_inode)
-		goto out_no_root;
-	sb->s_root = d_alloc_root(root_inode, NULL);
-	if (!sb->s_root)
-		goto out_no_root;
 	sb->s_root->d_op = &nfs_dentry_operations;
-	sb->s_root->d_fsdata = root_fh;
+	sb->s_root->d_fsdata = root_d_fsdata;
+	sb->u.nfs_sb.s_root = root_fh;
+
+	/* Get some general file system info */
+	if (server->rpc_ops->statfs(server, root, &fsinfo) >= 0) {
+		if (server->namelen == 0)
+			server->namelen = fsinfo.namelen;
+	} else {
+		printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n");
+		goto failure_put_root;
+	}
+
+	/* Fire up the writeback cache */
+	if (nfs_reqlist_alloc(server) < 0) {
+		printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n");
+                goto failure_put_root;
+	}
+
+	if (data->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
+	if (data->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
+
+	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
+
+	/* NFSv3: we don't have bsize, but rather rtmult and wtmult... */
+	if (!fsinfo.bsize)
+		fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult;
+	/* Also make sure we don't go below rsize/wsize since
+	 * RPC calls are expensive */
+	if (fsinfo.bsize < server->rsize)
+		fsinfo.bsize = server->rsize;
+	if (fsinfo.bsize < server->wsize)
+		fsinfo.bsize = server->wsize;
+
+	if (data->bsize == 0)
+		sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits);
+	if (server->rsize > fsinfo.rtmax)
+		server->rsize = fsinfo.rtmax;
+	if (server->wsize > fsinfo.wtmax)
+		server->wsize = fsinfo.wtmax;
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
+
+	maxlen = (version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN;
+
+	if (server->namelen == 0 || server->namelen > maxlen)
+		server->namelen = maxlen;
 
 	/* We're airborne */
 	unlock_super(sb);
@@ -326,47 +528,26 @@
 	return sb;
 
 	/* Yargs. It didn't work out. */
-out_no_root:
-	printk("nfs_read_super: get root inode failed\n");
-	iput(root_inode);
-	goto out_free_fh;
-
-out_no_fattr:
-	printk("nfs_read_super: get root fattr failed\n");
-out_free_fh:
-	kfree(root_fh);
-out_no_fh:
+ failure_put_root:
+	if (root_inode)
+		iput(root_inode);
+	if (root_d_fsdata)
+		kfree(root_d_fsdata);
+ out_no_fh:
 	rpciod_down();
-	goto out_shutdown;
-
-out_no_iod:
-	printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-out_shutdown:
-	rpc_shutdown_client(server->client);
-	goto out_free_host;
-
-out_no_client:
-	printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-	xprt_destroy(xprt);
-	goto out_free_host;
-
-out_no_xprt:
-	printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
 
-out_free_host:
-	kfree(server->hostname);
-out_unlock:
+ failure_unlock:
+	/* Yargs. It didn't work out. */
+	if (clnt)
+		rpc_shutdown_client(server->client);
+	else if (xprt)
+		xprt_destroy(xprt);
 	unlock_super(sb);
-	goto out_fail;
-
-out_no_remote:
-	printk("NFS: mount program didn't pass remote address!\n");
-	goto out_fail;
-
-out_miss_args:
-	printk("nfs_read_super: missing data argument\n");
+	if (server->hostname)
+		kfree(server->hostname);
+	printk(KERN_NOTICE "NFS: cannot create RPC transport.\n");
 
-out_fail:
+failure:
 	sb->s_dev = 0;
 	MOD_DEC_USE_COUNT;
 	return NULL;
@@ -375,27 +556,51 @@
 static int
 nfs_statfs(struct super_block *sb, struct statfs *buf, int bufsiz)
 {
-	int error;
-	struct nfs_fsinfo res;
-	struct statfs tmp;
+	struct nfs_sb_info	*si = &sb->u.nfs_sb;
+	struct nfs_server	*server = &si->s_server;
+	unsigned char		blockbits;
+	unsigned long		blockres;
+	int			error;
+	struct nfs_fsinfo	res;
+	struct statfs		tmp;
 
-	error = nfs_proc_statfs(&sb->u.nfs_sb.s_server, &sb->u.nfs_sb.s_root,
-		&res);
+	error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root), &res);
 	if (error) {
-		printk("nfs_statfs: statfs error = %d\n", -error);
-		res.bsize = res.blocks = res.bfree = res.bavail = 0;
+		printk(KERN_NOTICE "nfs_statfs: statfs error = %d\n", -error);
+		memset(&res, 0, sizeof(res));
 	}
 	tmp.f_type = NFS_SUPER_MAGIC;
-	tmp.f_bsize = res.bsize;
-	tmp.f_blocks = res.blocks;
-	tmp.f_bfree = res.bfree;
-	tmp.f_bavail = res.bavail;
-	tmp.f_files = 0;
-	tmp.f_ffree = 0;
-	tmp.f_namelen = NAME_MAX;
+	if (res.bsize == 0)
+		res.bsize = sb->s_blocksize;
+	if (res.namelen == 0)
+		res.namelen = server->namelen;
+	tmp.f_bsize   = nfs_block_bits(res.bsize, &blockbits);
+	blockres = (1 << blockbits) - 1;
+	tmp.f_blocks  = (res.tbytes + blockres) >> blockbits;
+	tmp.f_bfree   = (res.fbytes + blockres) >> blockbits;
+	tmp.f_bavail  = (res.abytes + blockres) >> blockbits;
+	tmp.f_files   = res.tfiles;
+	tmp.f_ffree   = res.ffiles;
+	tmp.f_namelen = res.namelen;
 	return copy_to_user(buf, &tmp, bufsiz) ? -EFAULT : 0;
 }
 
+#if 0
+int nfs_remountfs(struct super_block *sb, int *flags, char *data)
+{
+	struct nfs_server *server = &sb->u.nfs_sb.s_server;
+
+	if (*flags & ~(NFS_MOUNT_NONLM|MS_RDONLY))
+		return -EINVAL;
+
+	if (*flags & ~NFS_MOUNT_NONLM)
+		return 0;
+
+	if ((*flags & NFS_MOUNT_NONLM) == (server->flags & NFS_MOUNT_NONLM))
+		return 0;
+}
+#endif
+
 /*
  * Free all unused dentries in an inode's alias list.
  *
@@ -417,11 +622,11 @@
 	unhashed = 0;
 	while ((tmp = tmp->next) != head) {
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
-		if (!list_empty(&dentry->d_subdirs))
-			shrink_dcache_parent(dentry);
 		dprintk("nfs_free_dentries: found %s/%s, d_count=%d, hashed=%d\n",
 			dentry->d_parent->d_name.name, dentry->d_name.name,
 			dentry->d_count, !list_empty(&dentry->d_hash));
+		if (!list_empty(&dentry->d_subdirs))
+			shrink_dcache_parent(dentry);
 		if (!dentry->d_count) {
 			dget(dentry);
 			d_drop(dentry);
@@ -434,33 +639,37 @@
 	return unhashed;
 }
 
+spinlock_t nfs_zap_inode_lock = SPIN_LOCK_UNLOCKED;
 /*
- * Invalidate the local caches
+ * Zap the caches.
  */
-static void
-nfs_zap_caches(struct inode *inode)
+int nfs_zap_caches(struct inode *inode)
 {
+	int	error = 0;
+
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
-	NFS_CACHEINV(inode);
+	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
+
+	if (S_ISREG(inode->i_mode))
+		error = nfs_invalidate_pages(inode);
+	spin_lock(&nfs_zap_inode_lock);
+	invalidate_inode_pages(inode);
+	spin_unlock(&nfs_zap_inode_lock);
 
-	if (S_ISDIR(inode->i_mode))
-		nfs_invalidate_dircache(inode);
-	else
-		invalidate_inode_pages(inode);
+	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+	NFS_CACHEINV(inode);
+	return error;
 }
 
-/*
- * Invalidate, but do not unhash, the inode
- */
 static void
 nfs_invalidate_inode(struct inode *inode)
 {
 	umode_t save_mode = inode->i_mode;
 
+	remove_inode_hash(inode);
+	nfs_zap_caches(inode);
 	make_bad_inode(inode);
 	inode->i_mode = save_mode;
-	nfs_inval(inode);
-	nfs_zap_caches(inode);
 }
 
 /*
@@ -494,25 +703,147 @@
 		/*
 		 * Preset the size and mtime, as there's no need
 		 * to invalidate the caches.
-		 */ 
-		inode->i_size  = fattr->size;
-		inode->i_mtime = fattr->mtime.seconds;
-		NFS_OLDMTIME(inode) = fattr->mtime.seconds;
+		 */
+		inode->i_size  = nfs_size_to_off_t(fattr->size);
+		inode->i_mtime = nfs_time_to_secs(fattr->mtime);
+		inode->i_atime = nfs_time_to_secs(fattr->atime);
+		inode->i_ctime = nfs_time_to_secs(fattr->ctime);
+		NFS_CACHE_CTIME(inode) = fattr->ctime;
+		NFS_CACHE_MTIME(inode) = fattr->mtime;
+		NFS_CACHE_ATIME(inode) = fattr->atime;
+		NFS_CACHE_ISIZE(inode) = fattr->size;
+		NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
+		NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 	}
 	nfs_refresh_inode(inode, fattr);
 }
 
+static struct inode *
+nfs_make_new_inode(struct super_block *sb, struct nfs_fattr *fattr)
+{
+	struct inode *inode = get_empty_inode();
+
+	if (!inode)
+		return NULL;	
+	inode->i_sb = sb;
+	inode->i_dev = sb->s_dev;
+	inode->i_flags = 0;
+	inode->i_ino = nfs_fattr_to_ino_t(fattr);
+	nfs_read_inode(inode);
+	NFS_FILEID(inode) = fattr->fileid;
+	NFS_FSID(inode) = fattr->fsid;
+	INIT_LIST_HEAD(&inode->u.nfs_i.i_alias64);
+	nfs_fill_inode(inode, fattr);
+	return inode;
+}
+
 /*
- * The following may seem pretty minimal, but the stateless nature
- * of NFS means that we can't do too much more. Previous attempts to use
- * fattr->nlink to determine how well the cached state matches the
- * server suffer from races with stale dentries. You also risk killing
- * off processes by just doing 'mv file newdir' on the server.
+ * Traverse the chain of inodes looking for a match
+ */
+static struct inode *
+nfs_find_inode(struct inode *inode, __u64 fileid, __u64 fsid)
+{
+	struct list_head *tmp, *head;
+	struct inode *tmp_inode;
+
+	if (NFS_FSID(inode) == fsid && NFS_FILEID(inode) == fileid)
+		return inode;
+
+	tmp = head = &inode->u.nfs_i.i_alias64;
+	while((tmp = tmp->next) != head) {
+		tmp_inode = list_entry(tmp, struct inode, u.nfs_i.i_alias64);
+		if (NFS_FSID(tmp_inode) != fsid)
+			continue;
+
+		if (NFS_FILEID(tmp_inode) != fileid)
+			continue;
+
+		if (tmp_inode->i_state & I_FREEING)
+			continue;
+
+		if (list_empty(&inode->i_hash))
+			continue;
+
+		return tmp_inode;
+	}
+	return NULL;
+}
+
+/*
+ * In NFSv3 we can have 64bit inode numbers. In order to support
+ * this, and re-exported directories (also seen in NFSv2)
+ * we are forced to allow 2 different inodes to have the same
+ * i_ino. We therefore set up a ring of 'aliased' inodes,
+ * that are labelled by the NFS_FILEID(inode), NFS_FSID(inode).
+ *
+ * All inodes are contained in the normal hash table, so 'iget' will
+ * find some inode in the ring.
  *
- * FIXME: Of course, if 2 exported files have the same fileid (but
- * different fsid which makes it legal) you're still buggered...
- *                                      Trond, August 1999.
  */
+static struct inode *
+nfs_iget(struct super_block *sb, struct nfs_fattr *fattr)
+{
+	struct inode *inode = NULL;
+	struct inode *free_inode = NULL;
+	__u64 fileid = fattr->fileid;
+	__u64 fsid = fattr->fsid;
+
+	inode = iget(sb, nfs_fattr_to_ino_t(fattr));
+
+	if (!inode)
+		return NULL;
+
+	/* Do we have a new inode? */
+	spin_lock(&nfs_inode_lock);
+	if (!NFS_FSID(inode) && !NFS_FILEID(inode)) {
+		NFS_FSID(inode) = fsid;
+		NFS_FILEID(inode) = fileid;
+		INIT_LIST_HEAD(&inode->u.nfs_i.i_alias64);
+	}
+	spin_unlock(&nfs_inode_lock);
+
+	/* A chain of aliased inodes exists. Find the right one... */
+	while (NFS_FSID(inode) != fsid || NFS_FILEID(inode) != fileid) {
+		struct inode *new_inode;
+
+		spin_lock(&nfs_inode_lock);
+		new_inode = nfs_find_inode(inode, fileid, fsid);
+
+		if (new_inode) {
+			spin_unlock(&nfs_inode_lock);
+			new_inode = igrab(new_inode);
+			if (new_inode) {
+				iput(inode);
+				inode = new_inode;
+			}
+			continue;
+		}
+
+		if (free_inode) {
+			/* Note ordering of the next 4 lines */
+			list_add(&free_inode->u.nfs_i.i_alias64, &inode->u.nfs_i.i_alias64);
+			spin_unlock(&nfs_inode_lock);
+			insert_inode_hash(free_inode);
+			iput(inode);
+			inode = free_inode;
+			free_inode = NULL;
+			break;
+		}
+		spin_unlock(&nfs_inode_lock);
+
+		free_inode = nfs_make_new_inode(sb, fattr);
+		if (!free_inode) {
+			iput(inode);
+			inode = NULL;
+			break;
+		}
+	}
+
+	if (free_inode)
+		iput(free_inode);
+	return inode;
+}
+
 static int
 nfs_inode_is_stale(struct inode *inode, struct nfs_fattr *fattr)
 {
@@ -527,16 +858,16 @@
 		is_stale = 1;
 
 	/*
-	 * Free up unused cached dentries to see if it's wise to unhash
-	 * the inode (which we can do if all the dentries have been unhashed).
+	 * If the inode seems stale, free up cached dentries.
 	 */
 	unhashed = nfs_free_dentries(inode);
 
-	/* Assume we're holding 1 lock on the inode from 'iget'
+	/* Assume we're holding an i_count
 	 *
 	 * NB: sockets sometimes have volatile file handles
 	 *     don't invalidate their inodes even if all dentries are
-	 *     unhashed. */
+	 *     unhashed.
+	 */
 	if (unhashed && inode->i_count == unhashed + 1
 	    && !S_ISSOCK(inode->i_mode) && !S_ISFIFO(inode->i_mode))
 		is_stale = 1;
@@ -559,12 +890,12 @@
 {
 	struct super_block *sb = dentry->d_sb;
 
-	dprintk("NFS: nfs_fhget(%s/%s fileid=%d)\n",
+	dprintk("NFS: nfs_fhget(%s/%s fileid=%lu)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-		fattr->fileid);
+		(unsigned long) fattr->fileid);
 
 	/* Install the file handle in the dentry */
-	*((struct nfs_fh *) dentry->d_fsdata) = *fhandle;
+	memcpy(NFS_FH(dentry), (u8*)fhandle, sizeof(*fhandle));
 
 #ifdef CONFIG_NFS_SNAPSHOT
 	/*
@@ -574,22 +905,16 @@
 	if ((dentry->d_parent->d_inode->u.nfs_i.flags & NFS_IS_SNAPSHOT) ||
 	    (dentry->d_name.len == 9 &&
 	     memcmp(dentry->d_name.name, ".snapshot", 9) == 0)) {
-		struct inode *inode = get_empty_inode();
+		struct inode *inode = nfs_make_new_inode(sb, fattr);
 		if (!inode)
-			goto out;	
-		inode->i_sb = sb;
-		inode->i_dev = sb->s_dev;
-		inode->i_flags = 0;
-		inode->i_ino = fattr->fileid;
-		nfs_read_inode(inode);
-		nfs_fill_inode(inode, fattr);
+			goto out;
 		inode->u.nfs_i.flags |= NFS_IS_SNAPSHOT;
 		dprintk("NFS: nfs_fhget(snapshot ino=%ld)\n", inode->i_ino);
 	out:
 		return inode;
 	}
 #endif
-	return __nfs_fhget(sb, fattr);
+	return __nfs_fhget(sb, fhandle, fattr);
 }
 
 /*
@@ -601,29 +926,38 @@
  * server has reused a fileid (i_ino) and we have a stale inode.
  */
 static struct inode *
-__nfs_fhget(struct super_block *sb, struct nfs_fattr *fattr)
+__nfs_fhget(struct super_block *sb, struct nfs_fh* fhandle,
+	                           struct nfs_fattr *fattr)
 {
 	struct inode *inode = NULL;
 
-	if (!fattr)
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		goto out_no_inode;
 
-	while (!inode) {
-		inode = iget(sb, fattr->fileid);
-		if (!inode)
-			goto out_no_inode;
-		/* N.B. This should be impossible ... */
-		if (inode->i_ino != fattr->fileid)
-			goto out_bad_id;
+	while((inode = nfs_iget(sb, fattr)) != NULL) {
 
+		/*
+		 * Check for busy inodes, and attempt to get rid of any
+		 * unused local references. If successful, we release the
+		 * inode and try again.
+		 *
+		 * Note that the busy test uses the values in the fattr,
+		 * as the inode may have become a different object.
+		 * (We can probably handle modes changes here, too.)
+		 */
 		if (!nfs_inode_is_stale(inode,fattr))
 			break;
 
+		dprintk("__nfs_fhget: inode %ld still busy, i_count=%d\n",
+		       inode->i_ino, inode->i_count);
 		remove_inode_hash(inode);
-		nfs_invalidate_inode(inode);
+		nfs_zap_caches(inode);
 		iput(inode);
-		inode = NULL;
 	}
+
+	if (!inode)
+		goto out_no_inode;
+
 	nfs_fill_inode(inode, fattr);
 	dprintk("NFS: __nfs_fhget(%x/%ld ct=%d)\n",
 		inode->i_dev, inode->i_ino, inode->i_count);
@@ -632,10 +966,7 @@
 	return inode;
 
 out_no_inode:
-	printk("__nfs_fhget: iget failed\n");
-	goto out;
-out_bad_id:
-	printk("__nfs_fhget: unexpected inode from iget\n");
+	printk(KERN_NOTICE "__nfs_fhget: iget failed\n");
 	goto out;
 }
 
@@ -643,9 +974,8 @@
 nfs_notify_change(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
-	int error;
-	struct nfs_sattr sattr;
 	struct nfs_fattr fattr;
+	int              error;
 
 	/*
 	 * Make sure the inode is up-to-date.
@@ -653,88 +983,74 @@
 	error = nfs_revalidate(dentry);
 	if (error) {
 #ifdef NFS_PARANOIA
-printk("nfs_notify_change: revalidate failed, error=%d\n", error);
+		printk(KERN_DEBUG "nfs_notify_change: revalidate failed, error=%d\n", error);
 #endif
 		goto out;
 	}
 
-	sattr.mode = (u32) -1;
-	if (attr->ia_valid & ATTR_MODE) 
-		sattr.mode = attr->ia_mode;
-
-	sattr.uid = (u32) -1;
-	if (attr->ia_valid & ATTR_UID)
-		sattr.uid = attr->ia_uid;
-
-	sattr.gid = (u32) -1;
-	if (attr->ia_valid & ATTR_GID)
-		sattr.gid = attr->ia_gid;
-
-	sattr.size = (u32) -1;
-	if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode))
-		sattr.size = attr->ia_size;
-
-	sattr.mtime.seconds = sattr.mtime.useconds = (u32) -1;
-	if (attr->ia_valid & ATTR_MTIME) {
-		sattr.mtime.seconds = attr->ia_mtime;
-		sattr.mtime.useconds = 0;
-	}
-
-	sattr.atime.seconds = sattr.atime.useconds = (u32) -1;
-	if (attr->ia_valid & ATTR_ATIME) {
-		sattr.atime.seconds = attr->ia_atime;
-		sattr.atime.useconds = 0;
-	}
+	if (!S_ISREG(inode->i_mode))
+		attr->ia_valid &= ~ATTR_SIZE;
 
 	error = nfs_wb_all(inode);
-	if (error)
+	if (error < 0)
 		goto out;
 
-	error = nfs_proc_setattr(NFS_DSERVER(dentry), NFS_FH(dentry),
-				&sattr, &fattr);
-	if (error)
+	/* Now perform the setattr call */
+	error = NFS_CALL(setattr, inode, (dentry, &fattr, attr));
+	if (error || !(fattr.valid & NFS_ATTR_FATTR)) {
+		nfs_zap_caches(inode);
 		goto out;
+	}
 	/*
 	 * If we changed the size or mtime, update the inode
 	 * now to avoid invalidating the page cache.
 	 */
-	if (sattr.size != (u32) -1) {
-		if (sattr.size != fattr.size)
-			printk("nfs_notify_change: sattr=%d, fattr=%d??\n",
-				sattr.size, fattr.size);
-		inode->i_size  = sattr.size;
-		inode->i_mtime = fattr.mtime.seconds;
+	if (!(fattr.valid & NFS_ATTR_WCC)) {
+		fattr.pre_size = NFS_CACHE_ISIZE(inode);
+		fattr.pre_mtime = NFS_CACHE_MTIME(inode);
+		fattr.pre_ctime = NFS_CACHE_CTIME(inode);
+		fattr.valid |= NFS_ATTR_WCC;
 	}
-	if (sattr.mtime.seconds != (u32) -1)
-		inode->i_mtime = fattr.mtime.seconds;
 	error = nfs_refresh_inode(inode, &fattr);
 out:
 	return error;
 }
 
-/*
- * Externally visible revalidation function
- */
 int
-nfs_revalidate(struct dentry *dentry)
+nfs_update_atime(struct dentry *dentry)
 {
-	return nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
+	struct iattr attr;
+	struct inode *inode = dentry->d_inode;
+
+	nfs_revalidate(dentry);
+	if (!inode || time_before(inode->i_atime,nfs_time_to_secs(NFS_CACHE_ATIME(inode))))
+		return 0;
+
+	attr.ia_valid = ATTR_ATIME|ATTR_ATIME_SET;
+	attr.ia_atime = inode->i_atime;
+	return nfs_notify_change(dentry, &attr);
 }
 
 /*
- * These are probably going to contain hooks for
- * allocating and releasing RPC credentials for
- * the file. I'll have to think about Tronds patch
- * a bit more..
+ * Wait for the inode to get unlocked.
+ * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
  */
-int nfs_open(struct inode *inode, struct file *filp)
+int
+nfs_wait_on_inode(struct inode *inode, int flag)
 {
-	return 0;
+	struct rpc_clnt		*clnt = NFS_CLIENT(inode);
+	if (!(NFS_FLAGS(inode) & flag))
+		return 0;
+	return nfs_wait_event(clnt, inode->i_wait, !(NFS_FLAGS(inode) & flag));
 }
 
-int nfs_release(struct inode *inode, struct file *filp)
+/*
+ * Externally visible revalidation function
+ */
+int
+nfs_revalidate(struct dentry *dentry)
 {
-	return 0;
+	return nfs_revalidate_inode(dentry);
 }
 
 /*
@@ -742,25 +1058,44 @@
  * the cached attributes have to be refreshed.
  */
 int
-_nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry)
+__nfs_revalidate_inode(struct dentry *dentry)
 {
 	struct inode	*inode = dentry->d_inode;
-	int		 status = 0;
 	struct nfs_fattr fattr;
+	int		 status = 0;
 
 	dfprintk(PAGECACHE, "NFS: revalidating %s/%s, ino=%ld\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		inode->i_ino);
-	status = nfs_proc_getattr(server, NFS_FH(dentry), &fattr);
+
+	if (!inode || is_bad_inode(inode))
+		return -ESTALE;
+
+	while (NFS_REVALIDATING(inode)) {
+		status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
+		if (status < 0)
+			return status;
+		if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode)))
+			return 0;
+	}
+	NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
+
+	status = NFS_CALL(getattr, inode, (dentry, &fattr));
 	if (status) {
 		int error;
 		u32 *fh;
+		struct dentry *dir = dentry->d_parent;
 		struct nfs_fh fhandle;
+		struct nfs_fattr dir_attr;
+
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: %s/%s getattr failed, ino=%ld, error=%d\n",
-			dentry->d_parent->d_name.name,
-			dentry->d_name.name, inode->i_ino, status);
+		       dentry->d_parent->d_name.name, dentry->d_name.name,
+		       inode->i_ino, status);
+		nfs_zap_caches(inode);
+
 		if (status != -ESTALE)
 			goto out;
+
 		/*
 		 * A "stale filehandle" error ... show the current fh
 		 * and find out what the filehandle should be.
@@ -768,8 +1103,9 @@
 		fh = (u32 *) NFS_FH(dentry);
 		dfprintk(PAGECACHE, "NFS: bad fh %08x%08x%08x%08x%08x%08x%08x%08x\n",
 			fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6],fh[7]);
-		error = nfs_proc_lookup(server, NFS_FH(dentry->d_parent), 
-					dentry->d_name.name, &fhandle, &fattr);
+		error = NFS_CALL(lookup, dir->d_inode, (dir, &dir_attr, 
+					&dentry->d_name, &fhandle, &fattr));
+		nfs_refresh_inode(dir->d_inode, &dir_attr);
 		if (error) {
 			dfprintk(PAGECACHE, "NFS: lookup failed, error=%d\n", error);
 			goto out;
@@ -785,13 +1121,16 @@
 	status = nfs_refresh_inode(inode, &fattr);
 	if (status) {
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: %s/%s refresh failed, ino=%ld, error=%d\n",
-			dentry->d_parent->d_name.name,
-			dentry->d_name.name, inode->i_ino, status);
+			 dentry->d_parent->d_name.name, dentry->d_name.name,
+			 inode->i_ino, status);
 		goto out;
 	}
+
 	dfprintk(PAGECACHE, "NFS: %s/%s revalidation complete\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 out:
+	NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
+	wake_up(&inode->i_wait);
 	return status;
 }
 
@@ -800,29 +1139,54 @@
  * an operation.  Here we update the inode to reflect the state
  * of the server's inode.
  *
- * This is a bit tricky because we have to make sure all dirty pages
- * have been sent off to the server before calling invalidate_inode_pages.
- * To make sure no other process adds more write requests while we try
- * our best to flush them, we make them sleep during the attribute refresh.
+ * If we have reason to believe that any data we cached has become
+ * invalid, we schedule it to be flushed on the next occasion
+ * (i.e. when nfs_revalidate_inode is called).
  *
- * A very similar scenario holds for the dir cache.
+ * The reason we don't do it here is because nfs_refresh_inode can
+ * be called outside of the process context, e.g. from nfs_readpage_result,
+ * which is invoked by rpciod.
  */
 int
 nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
-	int invalid = 0;
-	int error = -EIO;
-
-	dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n",
-		 inode->i_dev, inode->i_ino, inode->i_count);
+	off_t		new_size, new_isize;
+	__u64		new_mtime;
+	int		invalid = 0;
+	int		error = -EIO;
 
 	if (!inode || !fattr) {
-		printk("nfs_refresh_inode: inode or fattr is NULL\n");
+		printk(KERN_ERR "nfs_refresh_inode: inode or fattr is NULL\n");
 		goto out;
 	}
-	if (inode->i_ino != fattr->fileid) {
-		printk("nfs_refresh_inode: mismatch, ino=%ld, fattr=%d\n",
-			inode->i_ino, fattr->fileid);
+	if (inode->i_mode == 0) {
+		printk(KERN_ERR "nfs_refresh_inode: empty inode\n");
+		goto out;
+	}
+
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+		goto out;
+
+	if (is_bad_inode(inode))
+		goto out;
+
+	dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d info=0x%x)\n",
+			inode->i_dev, inode->i_ino, inode->i_count,
+			fattr->valid);
+
+
+	if (NFS_FSID(inode) != fattr->fsid ||
+	    NFS_FILEID(inode) != fattr->fileid) {
+		printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n"
+		       "expected (0x%lx%08lx/0x%lx%08lx), got (0x%lx%08lx/0x%lx%08lx)\n",
+		       (unsigned long) (NFS_FSID(inode)>>32),
+		       (unsigned long) (NFS_FSID(inode) & 0xFFFFFFFFUL),
+		       (unsigned long) (NFS_FILEID(inode)>>32),
+		       (unsigned long) (NFS_FILEID(inode) & 0xFFFFFFFFUL),
+		       (unsigned long) (fattr->fsid >> 32),
+		       (unsigned long) (fattr->fsid & 0xFFFFFFFFUL),
+		       (unsigned long) (fattr->fileid >> 32),
+		       (unsigned long) (fattr->fileid & 0xFFFFFFFFUL));
 		goto out;
 	}
 
@@ -832,54 +1196,102 @@
 	if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
 		goto out_changed;
 
-	inode->i_mode = fattr->mode;
-	inode->i_nlink = fattr->nlink;
-	inode->i_uid = fattr->uid;
-	inode->i_gid = fattr->gid;
+ 	new_mtime = fattr->mtime;
+	new_size = fattr->size;
+ 	new_isize = nfs_size_to_off_t(fattr->size);
 
-	inode->i_blocks = fattr->blocks;
-	inode->i_atime = fattr->atime.seconds;
-	inode->i_ctime = fattr->ctime.seconds;
+	error = 0;
 
 	/*
 	 * Update the read time so we don't revalidate too often.
 	 */
 	NFS_READTIME(inode) = jiffies;
-	error = 0;
 
 	/*
-	 * If we have pending write-back entries, we don't want
-	 * to look at the size or the mtime the server sends us
-	 * too closely, as we're in the middle of modifying them.
+	 * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache.
+	 *       NOT inode->i_size!!!
 	 */
-	if (NFS_WRITEBACK(inode))
-		goto out;
-
-	if (inode->i_size != fattr->size) {
+	if (NFS_CACHE_ISIZE(inode) != new_size) {
 #ifdef NFS_DEBUG_VERBOSE
-printk("NFS: size change on %x/%ld\n", inode->i_dev, inode->i_ino);
+		printk(KERN_DEBUG "NFS: isize change on %x/%ld\n", inode->i_dev, inode->i_ino);
 #endif
-		inode->i_size = fattr->size;
 		invalid = 1;
 	}
 
-	if (inode->i_mtime != fattr->mtime.seconds) {
+	/*
+	 * Note: we don't check inode->i_mtime since pipes etc.
+	 *       can change this value in VFS without requiring a
+	 *	 cache revalidation.
+	 */
+	if (NFS_CACHE_MTIME(inode) != new_mtime) {
 #ifdef NFS_DEBUG_VERBOSE
-printk("NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino);
+		printk(KERN_DEBUG "NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino);
 #endif
-		inode->i_mtime = fattr->mtime.seconds;
 		invalid = 1;
 	}
 
-	if (invalid)
-		goto out_invalid;
+	/* Check Weak Cache Consistency data.
+	 * If size and mtime match the pre-operation values, we can
+	 * assume that any attribute changes were caused by our NFS
+         * operation, so there's no need to invalidate the caches.
+         */
+        if (!S_ISDIR(inode->i_mode)
+	    && (fattr->valid & NFS_ATTR_WCC)
+	    && NFS_CACHE_ISIZE(inode) == fattr->pre_size
+	    && NFS_CACHE_MTIME(inode) == fattr->pre_mtime) {
+		invalid = 0;
+	}
+
+	/*
+	 * If we have pending writebacks, things can get
+	 * messy.
+	 */
+	if (NFS_CLUSTERS(inode) && new_isize < inode->i_size)
+		new_isize = inode->i_size;
+
+	NFS_CACHE_CTIME(inode) = fattr->ctime;
+	inode->i_ctime = nfs_time_to_secs(fattr->ctime);
+	/* If we've been messing around with atime, don't
+	 * update it. Save the server value in NFS_CACHE_ATIME.
+	 */
+	NFS_CACHE_ATIME(inode) = fattr->atime;
+	if (time_before(inode->i_atime, nfs_time_to_secs(fattr->atime)))
+		inode->i_atime = nfs_time_to_secs(fattr->atime);
+
+	NFS_CACHE_MTIME(inode) = new_mtime;
+	inode->i_mtime = nfs_time_to_secs(new_mtime);
 
+	NFS_CACHE_ISIZE(inode) = new_size;
+	inode->i_size = new_isize;
+
+	inode->i_mode = fattr->mode;
+	inode->i_nlink = fattr->nlink;
+	inode->i_uid = fattr->uid;
+	inode->i_gid = fattr->gid;
+
+	if (fattr->valid & NFS_ATTR_FATTR_V3) {
+		/*
+		 * report the blocks in 512byte units
+		 */
+		inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+		inode->i_blksize = inode->i_sb->s_blocksize;
+ 	} else {
+ 		inode->i_blocks = fattr->du.nfs2.blocks;
+ 		inode->i_blksize = fattr->du.nfs2.blocksize;
+ 	}
+ 	inode->i_rdev = 0;
+ 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ 		inode->i_rdev = to_kdev_t(fattr->rdev);
+ 
 	/* Update attrtimeo value */
-	if (fattr->mtime.seconds == NFS_OLDMTIME(inode)) {
+	if (!invalid && time_after(jiffies, NFS_ATTRTIMEO_UPDATE(inode)+NFS_ATTRTIMEO(inode))) {
 		if ((NFS_ATTRTIMEO(inode) <<= 1) > NFS_MAXATTRTIMEO(inode))
 			NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode);
+		NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 	}
-	NFS_OLDMTIME(inode) = fattr->mtime.seconds;
+
+	if (invalid)
+		nfs_zap_caches(inode);
 
 out:
 	return error;
@@ -889,22 +1301,16 @@
 	 * Big trouble! The inode has become a different object.
 	 */
 #ifdef NFS_PARANOIA
-printk("nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n",
-inode->i_ino, inode->i_mode, fattr->mode);
+	printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n",
+	       inode->i_ino, inode->i_mode, fattr->mode);
 #endif
 	/*
 	 * No need to worry about unhashing the dentry, as the
 	 * lookup validation will know that the inode is bad.
+	 * (But we fall through to invalidate the caches.)
 	 */
 	nfs_invalidate_inode(inode);
 	goto out;
-
-out_invalid:
-#ifdef NFS_DEBUG_VERBOSE
-printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages);
-#endif
-	nfs_zap_caches(inode);
-	goto out;
 }
 
 /*
@@ -924,8 +1330,6 @@
 init_nfs_fs(void)
 {
 #ifdef CONFIG_PROC_FS
-	rpc_register_sysctl();
-	rpc_proc_init();
 	rpc_proc_register(&nfs_rpcstat);
 #endif
         return register_filesystem(&nfs_fs_type);
@@ -953,6 +1357,5 @@
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_filesystem(&nfs_fs_type);
-	nfs_free_dircache();
 }
 #endif
--- linux/fs/nfs/mount_clnt.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/nfs/mount_clnt.c	Fri Feb  4 23:26:34 2000
@@ -17,17 +17,14 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
 
 #ifdef RPC_DEBUG
 # define NFSDBG_FACILITY	NFSDBG_ROOT
 #endif
 
-#ifndef MAX
-# define MAX(a, b)	(((a) > (b))? (a) : (b))
-#endif
-
-
 /*
 #define MOUNT_PROGRAM		100005
 #define MOUNT_VERSION		1
@@ -35,7 +32,9 @@
 #define MOUNT_UMNT		3
  */
 
-static struct rpc_clnt *	mnt_create(char *, struct sockaddr_in *);
+static int			nfs_gen_mount(struct sockaddr_in *, char *,
+					      struct nfs_fh *, int);
+static struct rpc_clnt *	mnt_create(char *, struct sockaddr_in *, int);
 extern struct rpc_program	mnt_program;
 
 struct mnt_fhstatus {
@@ -43,30 +42,44 @@
 	struct nfs_fh *		fh;
 };
 
+int
+nfs_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh)
+{
+	return nfs_gen_mount(addr, path, fh, NFS_MNT_VERSION);
+}
+
+int
+nfs3_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh)
+{
+	return nfs_gen_mount(addr, path, fh, NFS_MNT3_VERSION);
+}
+
 /*
  * Obtain an NFS file handle for the given host and path
  */
-int
-nfs_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh)
+static int
+nfs_gen_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, int version)
 {
 	struct rpc_clnt		*mnt_clnt;
 	struct mnt_fhstatus	result = { 0, fh };
 	char			hostname[32];
 	int			status;
+	int			call;
 
 	dprintk("NFS:      nfs_mount(%08x:%s)\n",
 			(unsigned)ntohl(addr->sin_addr.s_addr), path);
 
 	strcpy(hostname, in_ntoa(addr->sin_addr.s_addr));
-	if (!(mnt_clnt = mnt_create(hostname, addr)))
+	if (!(mnt_clnt = mnt_create(hostname, addr, version)))
 		return -EACCES;
 
-	status = rpc_call(mnt_clnt, NFS_MNTPROC_MNT, path, &result, 0);
+	call = (version == 3) ? MOUNTPROC3_MNT : MNTPROC_MNT;
+	status = rpc_call(mnt_clnt, call, path, &result, 0);
 	return status < 0? status : (result.status? -EACCES : 0);
 }
 
 static struct rpc_clnt *
-mnt_create(char *hostname, struct sockaddr_in *srvaddr)
+mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version)
 {
 	struct rpc_xprt	*xprt;
 	struct rpc_clnt	*clnt;
@@ -75,7 +88,7 @@
 		return NULL;
 
 	clnt = rpc_create_client(xprt, hostname,
-				&mnt_program, NFS_MNT_VERSION,
+				&mnt_program, version,
 				RPC_AUTH_NULL);
 	if (!clnt) {
 		xprt_destroy(xprt);
@@ -100,7 +113,7 @@
 static int
 xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
 {
-	p = xdr_encode_string(p, path);
+	p = xdr_encode_string(p, path, -1);
 
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
@@ -109,14 +122,35 @@
 static int
 xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
 {
-	if ((res->status = ntohl(*p++)) == 0)
-		memcpy(res->fh, p, sizeof(*res->fh));
+	if ((res->status = ntohl(*p++)) == 0) {
+		res->fh->size = NFS2_FHSIZE;
+		memcpy(res->fh->data, p, NFS2_FHSIZE);
+	} else
+		memset((u8 *)res, 0, sizeof(*res));
+	return 0;
+}
+
+static int
+xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+{
+	memset((u8 *)res, 0, sizeof(*res));
+	if ((res->status = ntohl(*p++)) == 0) {
+		int size = ntohl(*p++);
+		if (size <= NFS3_FHSIZE) {
+			res->fh->size = size;
+			memcpy(res->fh->data, p, res->fh->size);
+		}
+	}
 	return 0;
 }
 
 #define MNT_dirpath_sz		(1 + 256)
 #define MNT_fhstatus_sz		(1 + 8)
 
+#ifndef MAX
+# define MAX(a, b)      (((a) > (b))? (a) : (b))
+#endif
+
 static struct rpc_procinfo	mnt_procedures[2] = {
 	{ "mnt_null",
 		(kxdrproc_t) xdr_error,	
@@ -124,16 +158,32 @@
 	{ "mnt_mount",
 		(kxdrproc_t) xdr_encode_dirpath,	
 		(kxdrproc_t) xdr_decode_fhstatus,
-		MAX(MNT_dirpath_sz, MNT_fhstatus_sz)<<2, 0},
+		MAX(MNT_dirpath_sz, MNT_fhstatus_sz) << 2, 0 },
+};
+
+static struct rpc_procinfo mnt3_procedures[2] = {
+	{ "mnt3_null",
+		(kxdrproc_t) xdr_error,
+		(kxdrproc_t) xdr_error, 0, 0 },
+	{ "mnt3_mount",
+		(kxdrproc_t) xdr_encode_dirpath,
+		(kxdrproc_t) xdr_decode_fhstatus3,
+		MAX(MNT_dirpath_sz, MNT_fhstatus_sz) << 2, 0 },
 };
 
 static struct rpc_version	mnt_version1 = {
 	1, 2, mnt_procedures
 };
 
+static struct rpc_version	mnt_version3 = {
+	3, 2, mnt3_procedures
+};
+
 static struct rpc_version *	mnt_version[] = {
 	NULL,
 	&mnt_version1,
+	NULL,
+	&mnt_version3,
 };
 
 static struct rpc_stat		mnt_stats;
--- linux/fs/nfs/nfs2xdr.c.nfsattack-gafton	Sat Mar  6 17:21:13 1999
+++ linux/fs/nfs/nfs2xdr.c	Fri Feb  4 23:26:34 2000
@@ -1,13 +1,15 @@
 /*
- * linux/fs/nfs/xdr.c
+ * linux/fs/nfs/nfs2xdr.c
  *
  * XDR functions to encode/decode NFS RPC arguments and results.
  *
  * Copyright (C) 1992, 1993, 1994  Rick Sladkey
  * Copyright (C) 1996 Olaf Kirch
+ * 04 Aug 1998  Ion Badulescu <ionut@cs.columbia.edu>
+ * 		FIFO's need special handling in NFSv2
  */
 
-#define NFS_NEED_XDR_TYPES
+#define NFS_NEED_NFS2_XDR_TYPES
 
 #include <linux/param.h>
 #include <linux/sched.h>
@@ -20,6 +22,8 @@
 #include <linux/pagemap.h>
 #include <linux/proc_fs.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
 #include <linux/nfs_fs.h>
 
 /* Uncomment this to support servers requiring longword lengths */
@@ -28,8 +32,7 @@
 #define NFSDBG_FACILITY		NFSDBG_XDR
 /* #define NFS_PARANOIA 1 */
 
-#define QUADLEN(len)		(((len) + 3) >> 2)
-static int			nfs_stat_to_errno(int stat);
+extern int			nfs_stat_to_errno(int stat);
 
 /* Mapping from NFS error code to "errno" error code. */
 #define errno_NFSERR_IO		EIO
@@ -40,8 +43,8 @@
  */
 #define NFS_fhandle_sz		8
 #define NFS_sattr_sz		8
-#define NFS_filename_sz		1+(NFS_MAXNAMLEN>>2)
-#define NFS_path_sz		1+(NFS_MAXPATHLEN>>2)
+#define NFS_filename_sz		1+(NFS2_MAXNAMLEN>>2)
+#define NFS_path_sz		1+(NFS2_MAXPATHLEN>>2)
 #define NFS_fattr_sz		17
 #define NFS_info_sz		5
 #define NFS_entry_sz		NFS_filename_sz+3
@@ -49,6 +52,7 @@
 #define NFS_enc_void_sz		0
 #define NFS_diropargs_sz	NFS_fhandle_sz+NFS_filename_sz
 #define NFS_sattrargs_sz	NFS_fhandle_sz+NFS_sattr_sz
+#define NFS_readlinkargs_sz	NFS_fhandle_sz
 #define NFS_readargs_sz		NFS_fhandle_sz+3
 #define NFS_writeargs_sz	NFS_fhandle_sz+4
 #define NFS_createargs_sz	NFS_diropargs_sz+NFS_sattr_sz
@@ -60,8 +64,9 @@
 #define NFS_dec_void_sz		0
 #define NFS_attrstat_sz		1+NFS_fattr_sz
 #define NFS_diropres_sz		1+NFS_fhandle_sz+NFS_fattr_sz
-#define NFS_readlinkres_sz	1+NFS_path_sz
+#define NFS_readlinkres_sz	1
 #define NFS_readres_sz		1+NFS_fattr_sz+1
+#define NFS_writeres_sz         NFS_attrstat_sz
 #define NFS_stat_sz		1
 #define NFS_readdirres_sz	1
 #define NFS_statfsres_sz	1+NFS_info_sz
@@ -72,15 +77,19 @@
 static inline u32 *
 xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle)
 {
-	*((struct nfs_fh *) p) = *fhandle;
-	return p + QUADLEN(sizeof(*fhandle));
+	memcpy(p, fhandle->data, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
 static inline u32 *
 xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
 {
-	*fhandle = *((struct nfs_fh *) p);
-	return p + QUADLEN(sizeof(*fhandle));
+	/* Zero handle first to allow comparisons */
+	memset(fhandle, 0, sizeof(*fhandle));
+	/* NFSv2 handles have a fixed length */
+	fhandle->size = NFS2_FHSIZE;
+	memcpy(fhandle->data, p, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
 static inline u32 *
@@ -91,7 +100,14 @@
 	if (*len > maxlen)
 		return NULL;
 	*string = (char *) p;
-	return p + QUADLEN(*len);
+	return p + XDR_QUADLEN(*len);
+}
+
+static inline u32*
+xdr_decode_time(u32 *p, u64 *timep)
+{
+	*timep = ((u64)ntohl(*p++) << 32) + (u64)ntohl(*p++);
+	return p;
 }
 
 static inline u32 *
@@ -103,33 +119,51 @@
 	fattr->uid = ntohl(*p++);
 	fattr->gid = ntohl(*p++);
 	fattr->size = ntohl(*p++);
-	fattr->blocksize = ntohl(*p++);
+	fattr->du.nfs2.blocksize = ntohl(*p++);
 	fattr->rdev = ntohl(*p++);
-	fattr->blocks = ntohl(*p++);
+	fattr->du.nfs2.blocks = ntohl(*p++);
 	fattr->fsid = ntohl(*p++);
 	fattr->fileid = ntohl(*p++);
-	fattr->atime.seconds = ntohl(*p++);
-	fattr->atime.useconds = ntohl(*p++);
-	fattr->mtime.seconds = ntohl(*p++);
-	fattr->mtime.useconds = ntohl(*p++);
-	fattr->ctime.seconds = ntohl(*p++);
-	fattr->ctime.useconds = ntohl(*p++);
+	p = xdr_decode_time(p, &fattr->atime);
+	p = xdr_decode_time(p, &fattr->mtime);
+	p = xdr_decode_time(p, &fattr->ctime);
+	fattr->valid |= NFS_ATTR_FATTR;
+	if (fattr->type == NFCHR && fattr->rdev == NFS2_FIFO_DEV) {
+		fattr->type = NFFIFO;
+		fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
+		fattr->rdev = 0;
+	}
 	return p;
 }
 
+#define SATTR(p, attr, flag, field) \
+        *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
 static inline u32 *
-xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr)
+xdr_encode_sattr(u32 *p, struct iattr *attr)
 {
-	*p++ = htonl(sattr->mode);
-	*p++ = htonl(sattr->uid);
-	*p++ = htonl(sattr->gid);
-	*p++ = htonl(sattr->size);
-	*p++ = htonl(sattr->atime.seconds);
-	*p++ = htonl(sattr->atime.useconds);
-	*p++ = htonl(sattr->mtime.seconds);
-	*p++ = htonl(sattr->mtime.useconds);
-	return p;
+	SATTR(p, attr, ATTR_MODE, ia_mode);
+	SATTR(p, attr, ATTR_UID, ia_uid);
+	SATTR(p, attr, ATTR_GID, ia_gid);
+	SATTR(p, attr, ATTR_SIZE, ia_size);
+
+	if (attr->ia_valid & (ATTR_ATIME|ATTR_ATIME_SET)) {
+		*p++ = htonl(attr->ia_atime);
+		*p++ = 0;
+	} else {
+		*p++ = ~(u32) 0;
+		*p++ = ~(u32) 0;
+	}
+
+	if (attr->ia_valid & (ATTR_MTIME|ATTR_MTIME_SET)) {
+		*p++ = htonl(attr->ia_mtime);
+		*p++ = 0;
+	} else {
+		*p++ = ~(u32) 0;	
+		*p++ = ~(u32) 0;
+	}
+  	return p;
 }
+#undef SATTR;
 
 /*
  * NFS encode functions
@@ -176,7 +210,7 @@
 nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
-	p = xdr_encode_string(p, args->name);
+	p = xdr_encode_string(p, args->name, args->len);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
@@ -190,7 +224,8 @@
 nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
 {
 	struct rpc_auth	*auth = req->rq_task->tk_auth;
-	int		replen, buflen;
+	int		buflen, replen;
+	unsigned int	nr;
 
 	p = xdr_encode_fhandle(p, args->fh);
 	*p++ = htonl(args->offset);
@@ -198,21 +233,25 @@
 	*p++ = htonl(args->count);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 
-#if 1
+	/* Get the number of buffers in the receive iovec */
+        nr = args->nriov;
+
+        if (nr+2 > MAX_IOVEC) {
+                printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n");
+                return -EINVAL;
+        }
+
 	/* set up reply iovec */
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
 	buflen = req->rq_rvec[0].iov_len;
 	req->rq_rvec[0].iov_len  = replen;
-	req->rq_rvec[1].iov_base = args->buffer;
-	req->rq_rvec[1].iov_len  = args->count;
-	req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
-	req->rq_rvec[2].iov_len  = buflen - replen;
+        /* Copy the iovec */
+        memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec));
+
+	req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
+	req->rq_rvec[nr+1].iov_len  = buflen - replen;
 	req->rq_rlen = args->count + buflen;
-	req->rq_rnr = 3;
-#else
-	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
-	req->rq_rvec[0].iov_len  = replen;
-#endif
+	req->rq_rnr += nr+1;
 
 	return 0;
 }
@@ -226,7 +265,6 @@
 	struct iovec *iov = req->rq_rvec;
 	int	status, count, recvd, hdrlen;
 
-	dprintk("RPC:      readres OK status %lx\n", (long)ntohl(*p));
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
 	p = xdr_decode_fattr(p, res->fattr);
@@ -234,22 +272,26 @@
 	count = ntohl(*p++);
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	recvd = req->rq_rlen - hdrlen;
-	if (p != iov[2].iov_base) {
+	if (p != iov[req->rq_rnr-1].iov_base) {
 		/* Unexpected reply header size. Punt.
 		 * XXX: Move iovec contents to align data on page
 		 * boundary and adjust RPC header size guess */
-		printk("NFS: Odd RPC header size in read reply: %d\n", hdrlen);
+		printk(KERN_WARNING "NFS: Odd RPC header size in read reply: %d\n", hdrlen);
 		return -errno_NFSERR_IO;
 	}
 	if (count > recvd) {
-		printk("NFS: server cheating in read reply: "
+		printk(KERN_WARNING "NFS: server cheating in read reply: "
 			"count %d > recvd %d\n", count, recvd);
 		count = recvd;
 	}
 
 	dprintk("RPC:      readres OK count %d\n", count);
-	if (count < res->count)
-		memset((u8 *)(iov[1].iov_base+count), 0, res->count-count);
+	if (count < res->count) {
+		xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count);
+		res->count = count;
+		res->eof = 1;  /* Silly NFSv3ism which can't be helped */
+	} else
+		res->eof = 0;
 
 	return count;
 }
@@ -261,6 +303,7 @@
 static int
 nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
 {
+	unsigned int nr;
 	u32 count = args->count;
 
 	p = xdr_encode_fhandle(p, args->fh);
@@ -270,10 +313,19 @@
 	*p++ = htonl(count);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 
-	req->rq_svec[1].iov_base = (void *) args->buffer;
-	req->rq_svec[1].iov_len = count;
+	/* Get the number of buffers in the send iovec */
+	nr = args->nriov;
+
+	if (nr+2 > MAX_IOVEC) {
+                printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs "
+                        "(nr %d max %d)\n", nr, MAX_IOVEC);
+                return -EINVAL;
+        }
+
+	/* Copy the iovec */
+        memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec));
 	req->rq_slen += count;
-	req->rq_snr = 2;
+	req->rq_snr += nr;
 
 #ifdef NFS_PAD_WRITES
 	/*
@@ -286,10 +338,10 @@
 printk("nfs_writeargs: padding write, len=%d, slen=%d, pad=%d\n",
 req->rq_svec[1].iov_len, req->rq_slen, count);
 #endif
-		req->rq_svec[2].iov_base = (void *) "\0\0\0";
-		req->rq_svec[2].iov_len  = count;
+		req->rq_svec[req->rq_snr].iov_base = (void *) "\0\0\0";
+		req->rq_svec[req->rq_snr].iov_len  = count;
 		req->rq_slen += count;
-		req->rq_snr = 3;
+		req->rq_snr++;
 	}
 #endif
 
@@ -304,7 +356,7 @@
 nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
-	p = xdr_encode_string(p, args->name);
+	p = xdr_encode_string(p, args->name, args->len);
 	p = xdr_encode_sattr(p, args->sattr);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
@@ -317,9 +369,9 @@
 nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
-	p = xdr_encode_string(p, args->fromname);
+	p = xdr_encode_string(p, args->fromname, args->fromlen);
 	p = xdr_encode_fhandle(p, args->tofh);
-	p = xdr_encode_string(p, args->toname);
+	p = xdr_encode_string(p, args->toname, args->tolen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
@@ -332,7 +384,7 @@
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_fhandle(p, args->tofh);
-	p = xdr_encode_string(p, args->toname);
+	p = xdr_encode_string(p, args->toname, args->tolen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
@@ -344,8 +396,8 @@
 nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
-	p = xdr_encode_string(p, args->fromname);
-	p = xdr_encode_string(p, args->topath);
+	p = xdr_encode_string(p, args->fromname, args->fromlen);
+	p = xdr_encode_string(p, args->topath, args->tolen);
 	p = xdr_encode_sattr(p, args->sattr);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
@@ -360,7 +412,7 @@
 	struct rpc_task	*task = req->rq_task;
 	struct rpc_auth	*auth = task->tk_auth;
 	u32		bufsiz = args->bufsiz;
-	int		replen;
+	int		buflen, replen;
 
 	/*
 	 * Some servers (e.g. HP OS 9.5) seem to expect the buffer size
@@ -376,51 +428,37 @@
 
 	/* set up reply iovec */
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
-	/*
-	dprintk("RPC: readdirargs: slack is 4 * (%d + %d + %d) = %d\n",
-		RPC_REPHDRSIZE, auth->au_rslack, NFS_readdirres_sz, replen);
-	 */
+	buflen = req->rq_rvec[0].iov_len;
 	req->rq_rvec[0].iov_len  = replen;
 	req->rq_rvec[1].iov_base = args->buffer;
 	req->rq_rvec[1].iov_len  = args->bufsiz;
-	req->rq_rlen = replen + args->bufsiz;
-	req->rq_rnr = 2;
-
-	/*
-	dprintk("RPC:      readdirargs set up reply vec:\n");
-	dprintk("          rvec[0] = %p/%d\n",
-			req->rq_rvec[0].iov_base,
-			req->rq_rvec[0].iov_len);
-	dprintk("          rvec[1] = %p/%d\n",
-			req->rq_rvec[1].iov_base,
-			req->rq_rvec[1].iov_len);
-	 */
+	req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
+	req->rq_rvec[2].iov_len  = buflen - replen;
+	req->rq_rlen = buflen + args->bufsiz;
+	req->rq_rnr += 2;
 
 	return 0;
 }
 
 /*
- * Decode the result of a readdir call. We decode the result in place
- * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name.
- * After decoding, the layout in memory looks like this:
- *	entry1 entry2 ... entryN <space> stringN ... string2 string1
- * Each entry consists of three __u32 values, the same space as NFS uses.
- * Note that the strings are not null-terminated so that the entire number
- * of entries returned by the server should fit into the buffer.
+ * Decode the result of a readdir call.
+ * We're not really decoding anymore, we just leave the buffer untouched
+ * and only check that it is syntactically correct.
+ * The real decoding happens in nfs_decode_entry below, called directly
+ * from nfs_readdir for each entry.
  */
 static int
 nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
 {
 	struct iovec		*iov = req->rq_rvec;
 	int			 status, nr;
-	char			*string, *start;
-	u32			*end, *entry, len, fileid, cookie;
+	u32			*end, *entry, len;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
 	if ((void *) p != ((u8 *) iov->iov_base+iov->iov_len)) {
 		/* Unexpected reply header size. Punt. */
-		printk("NFS: Odd RPC header size in readdirres reply\n");
+		printk(KERN_WARNING "NFS: Odd RPC header size in readdirres reply\n");
 		return -errno_NFSERR_IO;
 	}
 
@@ -429,66 +467,58 @@
 	end = (u32 *) ((u8 *) p + iov[1].iov_len);
 
 	/* Get start and end of dirent buffer */
-	entry  = (u32 *) res->buffer;
-	start  = (char *) res->buffer;
-	string = (char *) res->buffer + res->bufsiz;
-	for (nr = 0; *p++; nr++) {
-		fileid = ntohl(*p++);
+	if (res->buffer != p) {
+		printk(KERN_ERR "NFS: Bad result buffer in readdir\n");
+		return -errno_NFSERR_IO;
+	}
 
+	for (nr = 0; *p++; nr++) {
+		entry = p - 1;
+		p++; /* fileid */
 		len = ntohl(*p++);
-		/*
-		 * Check whether the server has exceeded our reply buffer,
-		 * and set a flag to convert the size to longwords.
-		 */
-		if ((p + QUADLEN(len) + 3) > end) {
-			struct rpc_clnt *clnt = req->rq_task->tk_client;
-			printk(KERN_WARNING
-				"NFS: server %s, readdir reply truncated\n",
-				clnt->cl_server);
-			printk(KERN_WARNING "NFS: nr=%d, slots=%d, len=%d\n",
-				nr, (end - p), len);
-			clnt->cl_flags |= NFS_CLNTF_BUFSIZE;
-			break;
-		}
-		if (len > NFS_MAXNAMLEN) {
-			printk("NFS: giant filename in readdir (len %x)!\n",
+		p += XDR_QUADLEN(len) + 1;	/* name plus cookie */
+		if (len > NFS2_MAXNAMLEN) {
+			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
 						len);
 			return -errno_NFSERR_IO;
 		}
-		string -= len;
-		if ((void *) (entry+3) > (void *) string) {
-			/* 
-			 * This error is impossible as long as the temp
-			 * buffer is no larger than the user buffer. The 
-			 * current packing algorithm uses the same amount
-			 * of space in the user buffer as in the XDR data,
-			 * so it's guaranteed to fit.
-			 */
-			printk("NFS: incorrect buffer size in %s!\n",
-				__FUNCTION__);
+		if (p + 2 > end) {
+			printk(KERN_NOTICE
+				"NFS: short packet in readdir reply!\n");
+			entry[0] = entry[1] = 0;
 			break;
 		}
+	}
+	p++; /* EOF flag */
 
-		memmove(string, p, len);
-		p += QUADLEN(len);
-		cookie = ntohl(*p++);
-		/*
-		 * To make everything fit, we encode the length, offset,
-		 * and eof flag into 32 bits. This works for filenames
-		 * up to 32K and PAGE_SIZE up to 64K.
-		 */
-		status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */
-		*entry++ = fileid;
-		*entry++ = cookie;
-		*entry++ = ((string - start) << 16) | status | (len & 0x7FFF);
-	}
-#ifdef NFS_PARANOIA
-printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n",
-nr, ((char *) entry - start), (start + res->bufsiz - string));
-#endif
+	if (p > end) {
+		printk(KERN_NOTICE
+			"NFS: short packet in readdir reply!\n");
+		return -errno_NFSERR_IO;
+	}
 	return nr;
 }
 
+u32 *
+nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	entry->ino	  = ntohl(*p++);
+	entry->len	  = ntohl(*p++);
+	entry->name	  = (const char *) p;
+	p		 += XDR_QUADLEN(entry->len);
+	entry->cookie	  = ntohl(*p++);
+	entry->eof	  = !p[0] && p[1];
+
+	return p;
+}
+
 /*
  * NFS XDR decode functions
  */
@@ -523,12 +553,9 @@
 {
 	int	status;
 
-	dprintk("RPC:      attrstat status %lx\n", (long)ntohl(*p));
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
 	xdr_decode_fattr(p, fattr);
-	dprintk("RPC:      attrstat OK type %d mode %o dev %x ino %x\n",
-		fattr->type, fattr->mode, fattr->fsid, fattr->fileid);
 	return 0;
 }
 
@@ -541,14 +568,34 @@
 {
 	int	status;
 
-	dprintk("RPC:      diropres status %lx\n", (long)ntohl(*p));
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
 	p = xdr_decode_fhandle(p, res->fh);
 	xdr_decode_fattr(p, res->fattr);
-	dprintk("RPC:      diropres OK type %x mode %o dev %x ino %x\n",
-		res->fattr->type, res->fattr->mode,
-		res->fattr->fsid, res->fattr->fileid);
+	return 0;
+}
+
+/*
+ * Encode READLINK args
+ */
+static int
+nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args)
+{
+	struct rpc_task *task = req->rq_task;
+	struct rpc_auth *auth = task->tk_auth;
+	int		buflen, replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
+	buflen = req->rq_rvec[0].iov_len;
+	req->rq_rvec[0].iov_len  = replen;
+	req->rq_rvec[1].iov_base = args->buffer;
+	req->rq_rvec[1].iov_len  = args->bufsiz;
+	req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
+	req->rq_rvec[2].iov_len  = buflen - replen;
+	req->rq_rlen = buflen + args->bufsiz;
+	req->rq_rnr += 2;
 	return 0;
 }
 
@@ -558,33 +605,70 @@
 static int
 nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res)
 {
+	u32	*strlen;
+	char	*string;
 	int	status;
+	unsigned int len;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
-	xdr_decode_string2(p, res->string, res->lenp, res->maxlen);
-
-	/* Caller takes over the buffer here to avoid extra copy */
-	res->buffer = req->rq_task->tk_buffer;
-	req->rq_task->tk_buffer = NULL;
+	strlen = (u32*)res->buffer;
+	/* Convert length of symlink */
+	len = ntohl(*strlen);
+	if (len > res->bufsiz - 5)
+		len = res->bufsiz - 5;
+	*strlen = len;
+	/* NULL terminate the string we got */
+	string = (char *)(strlen + 1);
+	string[len] = 0;
 	return 0;
 }
 
 /*
+ * Decode WRITE reply
+ */
+static int
+nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+	res->verf->committed = NFS_FILE_SYNC;
+	return nfs_xdr_attrstat(req, p, res->fattr);
+}
+
+/*
  * Decode STATFS reply
  */
 static int
 nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
 {
 	int	status;
+	u32	xfer_size;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
-	res->tsize = ntohl(*p++);
-	res->bsize = ntohl(*p++);
-	res->blocks = ntohl(*p++);
-	res->bfree = ntohl(*p++);
-	res->bavail = ntohl(*p++);
+
+	/* For NFSv2, we more or less have to guess the preferred
+	 * read/write/readdir sizes from the single 'transfer size'
+	 * value.
+	 */
+	xfer_size = ntohl(*p++);	/* tsize */
+	res->rtmax  = 8 * 1024;
+	res->rtpref = xfer_size;
+	res->rtmult = xfer_size;
+	res->wtmax  = 8 * 1024;
+	res->wtpref = xfer_size;
+	res->wtmult = xfer_size;
+	res->dtpref = PAGE_CACHE_SIZE;
+	res->maxfilesize = 0x7FFFFFFF;	/* just a guess */
+	res->bsize  = ntohl(*p++);
+
+	res->tbytes = ntohl(*p++) * res->bsize;
+	res->fbytes = ntohl(*p++) * res->bsize;
+	res->abytes = ntohl(*p++) * res->bsize;
+	res->tfiles = 0;
+	res->ffiles = 0;
+	res->afiles = 0;
+	res->namelen = 0;
+
 	return 0;
 }
 
@@ -601,7 +685,7 @@
 	{ NFSERR_NOENT,		ENOENT		},
 	{ NFSERR_IO,		errno_NFSERR_IO	},
 	{ NFSERR_NXIO,		ENXIO		},
-	{ NFSERR_EAGAIN,	EAGAIN		},
+/*	{ NFSERR_EAGAIN,	EAGAIN		}, */
 	{ NFSERR_ACCES,		EACCES		},
 	{ NFSERR_EXIST,		EEXIST		},
 	{ NFSERR_XDEV,		EXDEV		},
@@ -612,18 +696,31 @@
 	{ NFSERR_FBIG,		EFBIG		},
 	{ NFSERR_NOSPC,		ENOSPC		},
 	{ NFSERR_ROFS,		EROFS		},
-	{ NFSERR_OPNOTSUPP,	EOPNOTSUPP	},
+	{ NFSERR_MLINK,		EMLINK		},
 	{ NFSERR_NAMETOOLONG,	ENAMETOOLONG	},
 	{ NFSERR_NOTEMPTY,	ENOTEMPTY	},
 	{ NFSERR_DQUOT,		EDQUOT		},
 	{ NFSERR_STALE,		ESTALE		},
+	{ NFSERR_REMOTE,	EREMOTE		},
 #ifdef EWFLUSH
 	{ NFSERR_WFLUSH,	EWFLUSH		},
 #endif
+	{ NFSERR_BADHANDLE,	EBADHANDLE	},
+	{ NFSERR_NOT_SYNC,	ENOTSYNC	},
+	{ NFSERR_BAD_COOKIE,	EBADCOOKIE	},
+	{ NFSERR_NOTSUPP,	ENOTSUPP	},
+	{ NFSERR_TOOSMALL,	ETOOSMALL	},
+	{ NFSERR_SERVERFAULT,	ESERVERFAULT	},
+	{ NFSERR_BADTYPE,	EBADTYPE	},
+	{ NFSERR_JUKEBOX,	EJUKEBOX	},
 	{ -1,			EIO		}
 };
 
-static int
+/*
+ * Convert an NFS error code to a local one.
+ * This one is used jointly by NFSv2 and NFSv3.
+ */
+int
 nfs_stat_to_errno(int stat)
 {
 	int i;
@@ -632,7 +729,7 @@
 		if (nfs_errtbl[i].stat == stat)
 			return nfs_errtbl[i].errno;
 	}
-	printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+	printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
 	return nfs_errtbl[i].errno;
 }
 
@@ -644,7 +741,8 @@
     { "nfs_" #proc,					\
       (kxdrproc_t) nfs_xdr_##argtype,			\
       (kxdrproc_t) nfs_xdr_##restype,			\
-      MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2	\
+      MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
+      0							\
     }
 
 static struct rpc_procinfo	nfs_procedures[18] = {
@@ -653,10 +751,10 @@
     PROC(setattr,	sattrargs,	attrstat),
     PROC(root,		enc_void,	dec_void),
     PROC(lookup,	diropargs,	diropres),
-    PROC(readlink,	fhandle,	readlinkres),
+    PROC(readlink,	readlinkargs,	readlinkres),
     PROC(read,		readargs,	readres),
     PROC(writecache,	enc_void,	dec_void),
-    PROC(write,		writeargs,	attrstat),
+    PROC(write,		writeargs,	writeres),
     PROC(create,	createargs,	diropres),
     PROC(remove,	diropargs,	stat),
     PROC(rename,	renameargs,	stat),
@@ -668,22 +766,8 @@
     PROC(statfs,	fhandle,	statfsres),
 };
 
-static struct rpc_version	nfs_version2 = {
+struct rpc_version		nfs_version2 = {
 	2,
 	sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
 	nfs_procedures
-};
-
-static struct rpc_version *	nfs_version[] = {
-	NULL,
-	NULL,
-	&nfs_version2
-};
-
-struct rpc_program	nfs_program = {
-	"nfs",
-	NFS_PROGRAM,
-	sizeof(nfs_version) / sizeof(nfs_version[0]),
-	nfs_version,
-	&nfs_rpcstat,
 };
--- linux/fs/nfs/nfs3proc.c.nfsattack-gafton	Fri Feb  4 23:26:34 2000
+++ linux/fs/nfs/nfs3proc.c	Fri Feb  4 23:26:34 2000
@@ -0,0 +1,466 @@
+/*
+ *  linux/fs/nfs/nfs3proc.c
+ *
+ *  Client-side NFSv3 procedures stubs.
+ *
+ *  Copyright (C) 1997, Olaf Kirch
+ */
+
+#define NFS_NEED_NFS3_XDR_TYPES
+
+#include <linux/param.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+
+#include <asm/segment.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+				struct nfs_fattr *fattr)
+{
+	int	status;
+
+	dprintk("NFS call  getroot\n");
+	fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0);
+	dprintk("NFS reply getroot\n");
+	return status;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+static int
+nfs3_proc_getattr(struct dentry *dentry, struct nfs_fattr *fattr)
+{
+	int	status;
+
+	dprintk("NFS call  getattr\n");
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_GETATTR,
+			  NFS_FH(dentry), fattr, 0);
+	dprintk("NFS reply getattr\n");
+	return status;
+}
+
+static int
+nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+			struct iattr *sattr)
+{
+	struct nfs3_sattrargs	arg = { NFS_FH(dentry), sattr, 0, 0 };
+	int	status;
+
+	dprintk("NFS call  setattr\n");
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	dprintk("NFS reply setattr\n");
+	return status;
+}
+
+static int
+nfs3_proc_lookup(struct dentry *dir, struct nfs_fattr *dir_attr,
+			struct qstr *name,
+			struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs3_diropargs	arg = { NFS_FH(dir), name->name, name->len };
+	struct nfs3_diropres	res = { dir_attr, fhandle, fattr };
+	int			status;
+
+	dprintk("NFS call  lookup %s\n", name->name);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_LOOKUP, &arg, &res, 0);
+	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
+		status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_GETATTR,
+			 fhandle, fattr, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_access(struct dentry *dentry, int mode, struct nfs_fattr *fattr)
+{
+	struct nfs3_accessargs	arg = { NFS_FH(dentry), 0 };
+	struct nfs3_accessres	res = { fattr, 0 };
+	int	status;
+
+	dprintk("NFS call  access\n");
+	fattr->valid = 0;
+	if (mode & MAY_READ)
+		arg.access |= NFS3_ACCESS_READ;
+	if (mode & MAY_WRITE)
+		arg.access |= NFS3_ACCESS_MODIFY;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_ACCESS, &arg, &res, 0);
+	dprintk("NFS reply access\n");
+
+	if (status == 0 && (arg.access & res.access) != arg.access)
+		status = -EPERM;
+	return status;
+}
+
+static int
+nfs3_proc_readlink(struct dentry *dentry, struct nfs_fattr *fattr,
+			void *buffer, unsigned int buflen)
+{
+	struct nfs3_readlinkargs args = { NFS_FH(dentry), buffer, buflen };
+	struct nfs3_readlinkres	res = { fattr, buffer, buflen };
+	int			status;
+
+	dprintk("NFS call  readlink\n");
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_READLINK,
+					&args, &res, 0);
+	dprintk("NFS reply readlink: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_read(struct dentry *dentry, struct nfs_fattr *fattr,
+	       struct rpc_cred *cred, int flags,
+	       unsigned long offset, unsigned int count,
+	       void *buffer, int *eofp)
+{
+	struct nfs_readargs	arg = { NFS_FH(dentry), offset, count, 1,
+					{{buffer, count}, {0,0}, {0,0}, {0,0},
+					 {0,0}, {0,0}, {0,0}, {0,0}} };
+	struct nfs_readres	res = { fattr, count, 0 };
+	struct rpc_message	msg = { NFS3PROC_READ, &arg, &res, cred};
+	int			status;
+
+	dprintk("NFS call  read %d @ %ld\n", count, offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, flags);
+	dprintk("NFS reply read: %d\n", status);
+	*eofp = res.eof;
+	return status;
+}
+
+static int
+nfs3_proc_write(struct dentry *dentry, struct nfs_fattr *fattr,
+		struct rpc_cred *cred, int flags,
+		unsigned long offset, unsigned int count,
+		void *buffer, struct nfs_writeverf *verf)
+{
+	struct nfs_writeargs	arg = { NFS_FH(dentry), offset, count, 1, 1,
+					{{buffer, count}, {0,0}, {0,0}, {0,0},
+					 {0,0}, {0,0}, {0,0}, {0,0}} };
+	struct nfs_writeres	res = { fattr, verf, 0 };
+	struct rpc_message	msg = { NFS3PROC_WRITE, &arg, &res, cred};
+	int			status, rpcflags = 0;
+
+	dprintk("NFS call  write %d @ %ld\n", count, offset);
+	fattr->valid = 0;
+	if (flags & NFS_RW_SWAP)
+		rpcflags |= NFS_RPC_SWAPFLAGS;
+	arg.stable = (flags & NFS_RW_SYNC) ? NFS_FILE_SYNC : NFS_UNSTABLE;
+
+	status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, rpcflags);
+
+	dprintk("NFS reply read: %d\n", status);
+	return status < 0? status : res.count;
+}
+
+/*
+ * Create a regular file.
+ * For now, we don't implement O_EXCL.
+ */
+static int
+nfs3_proc_create(struct dentry *dir, struct nfs_fattr *dir_attr,
+			struct qstr *name, struct iattr *sattr, int flags,
+			struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs3_createargs	arg = { NFS_FH(dir), name->name, name->len,
+					sattr, 0, { 0, 0 } };
+	struct nfs3_diropres	res = { dir_attr, fhandle, fattr };
+	int			status;
+
+	dprintk("NFS call  create %s\n", name->name);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+
+	arg.createmode = NFS3_CREATE_UNCHECKED;
+	if (flags & O_EXCL) {
+		arg.createmode  = NFS3_CREATE_EXCLUSIVE;
+		arg.verifier[0] = jiffies;
+		arg.verifier[1] = current->pid;
+	}
+
+again:
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_CREATE, &arg, &res, 0);
+
+	/* If the server doesn't support the exclusive creation semantics,
+	 * try again with simple 'guarded' mode. */
+	if (status == NFSERR_NOTSUPP) {
+		switch (arg.createmode) {
+			case NFS3_CREATE_EXCLUSIVE:
+				arg.createmode = NFS3_CREATE_GUARDED;
+				break;
+
+			case NFS3_CREATE_GUARDED:
+				arg.createmode = NFS3_CREATE_UNCHECKED;
+				break;
+
+			default:
+				goto exit;
+		}
+		goto again;
+	}
+
+exit:
+	dprintk("NFS reply create: %d\n", status);
+
+	/* When we created the file with exclusive semantics, make
+	 * sure we set the attributes afterwards. */
+	if (status == 0 && arg.createmode == NFS3_CREATE_EXCLUSIVE) {
+		struct nfs3_sattrargs	arg = { fhandle, sattr, 0, 0 };
+		dprintk("NFS call  setattr (post-create)\n");
+
+		/* Note: we could use a guarded setattr here, but I'm
+		 * not sure this buys us anything (and I'd have
+		 * to revamp the NFSv3 XDR code) */
+		fattr->valid = 0;
+		status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_SETATTR,
+						&arg, fattr, 0);
+		dprintk("NFS reply setattr (post-create): %d\n", status);
+	}
+
+	return status;
+}
+
+static int
+nfs3_proc_remove(struct dentry *dir, struct nfs_fattr *dir_attr,
+				struct qstr *name, struct rpc_cred *cred)
+{
+	struct nfs3_diropargs	arg = { NFS_FH(dir), name->name, name->len };
+	struct rpc_message	msg = {NFS3PROC_REMOVE, &arg, dir_attr, cred };
+	int			status;
+
+	dprintk("NFS call  remove %s\n", name->name);
+	dir_attr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0);
+	dprintk("NFS reply remove: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_rename(struct dentry *old_dir, struct nfs_fattr *old_attr,
+			struct qstr *old_name,
+			struct dentry *new_dir, struct nfs_fattr *new_attr,
+			struct qstr *new_name)
+{
+	struct nfs3_renameargs	arg = { NFS_FH(old_dir),
+					old_name->name, old_name->len,
+					NFS_FH(new_dir),
+					new_name->name, new_name->len };
+	struct nfs3_renameres	res = { old_attr, new_attr };
+	int			status;
+
+	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+	old_attr->valid = 0;
+	new_attr->valid = 0;
+	status = rpc_call(NFS_CLIENT(old_dir->d_inode), NFS3PROC_RENAME, &arg, &res, 0);
+	dprintk("NFS reply rename: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_link(struct dentry *dentry, struct nfs_fattr *fattr,
+			struct dentry *dir, struct nfs_fattr *dir_attr,
+			struct qstr *name)
+{
+	struct nfs3_linkargs	arg = { NFS_FH(dentry), NFS_FH(dir),
+					name->name, name->len };
+	struct nfs3_linkres	res = { dir_attr, fattr };
+	int			status;
+
+	dprintk("NFS call  link %s\n", name->name);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_LINK, &arg, &res, 0);
+	dprintk("NFS reply link: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_symlink(struct dentry *dir, struct nfs_fattr *dir_attr,
+			struct qstr *name, struct qstr *path,
+			struct iattr *sattr,
+			struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs3_symlinkargs	arg = { NFS_FH(dir), name->name, name->len,
+					path->name, path->len, sattr };
+	struct nfs3_diropres	res = { dir_attr, fhandle, fattr };
+	int			status;
+
+	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_SYMLINK, &arg, &res, 0);
+	dprintk("NFS reply symlink: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_mkdir(struct dentry *dir, struct nfs_fattr *dir_attr,
+			struct qstr *name, struct iattr *sattr,
+			struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs3_createargs	arg = { NFS_FH(dir), name->name, name->len,
+					sattr, 0, { 0, 0 } };
+	struct nfs3_diropres	res = { dir_attr, fhandle, fattr };
+	int			status;
+
+	dprintk("NFS call  mkdir %s\n", name->name);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_MKDIR, &arg, &res, 0);
+	dprintk("NFS reply mkdir: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_rmdir(struct dentry *dir, struct nfs_fattr *dir_attr,
+				struct qstr *name)
+{
+	struct nfs3_diropargs	arg = { NFS_FH(dir), name->name, name->len };
+	int			status;
+
+	dprintk("NFS call  rmdir %s\n", name->name);
+	dir_attr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_RMDIR, &arg, dir_attr, 0);
+	dprintk("NFS reply rmdir: %d\n", status);
+	return status;
+}
+
+/*
+ * The READDIR implementation is somewhat hackish - we pass the user buffer
+ * to the encode function, which installs it in the receive iovec.
+ * The decode function itself doesn't perform any decoding, it just makes
+ * sure the reply is syntactically correct.
+ *
+ * Also note that this implementation handles both plain readdir and
+ * readdirplus.
+ */
+static int
+nfs3_proc_readdir(struct dentry *dir, struct nfs_fattr *dir_attr,
+		  struct rpc_cred *cred,
+		  u64 cookie, void *entry, unsigned int size, int plus)
+{
+	struct nfs3_readdirargs	arg = { NFS_FH(dir), cookie, {0, 0}, 0, 0, 0 };
+	struct nfs3_readdirres	res = { dir_attr, 0, 0, 0, 0 };
+	struct rpc_message	msg = { NFS3PROC_READDIR, &arg, &res, cred };
+	u32			*verf = NFS_COOKIEVERF(dir->d_inode);
+	int			status;
+
+	arg.buffer  = entry;
+	arg.bufsiz  = size;
+	arg.verf[0] = verf[0];
+	arg.verf[1] = verf[1];
+	arg.plus    = plus;
+	res.buffer  = entry;
+	res.bufsiz  = size;
+	res.verf    = verf;
+	res.plus    = plus;
+
+	if (plus)
+		msg.proc = NFS3PROC_READDIRPLUS;
+
+	msg.arg = &arg;
+	msg.res = &res;
+	msg.cred = cred;
+
+	dprintk("NFS call  readdir%s %d\n",
+			plus? "plus" : "", (unsigned int) cookie);
+
+	dir_attr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0);
+	dprintk("NFS reply readdir: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_mknod(struct dentry *dir, struct nfs_fattr *dir_attr,
+			struct qstr *name, struct iattr *sattr,
+			dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	struct nfs3_mknodargs	arg = { NFS_FH(dir), name->name, name->len, 0,
+					sattr, rdev };
+	struct nfs3_diropres	res = { dir_attr, fh, fattr };
+	int			status;
+
+	switch (sattr->ia_mode & S_IFMT) {
+	case S_IFBLK:	arg.type = NF3BLK;  break;
+	case S_IFCHR:	arg.type = NF3CHR;  break;
+	case S_IFIFO:	arg.type = NF3FIFO; break;
+	case S_IFSOCK:	arg.type = NF3SOCK; break;
+	default:	return -EINVAL;
+	}
+
+	dprintk("NFS call  mknod %s %x\n", name->name, rdev);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_MKNOD, &arg, &res, 0);
+	dprintk("NFS reply mknod: %d\n", status);
+	return status;
+}
+
+/*
+ * This is a combo call of fsstat and fsinfo
+ */
+static int
+nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("NFS call  fsstat\n");
+	memset((char *)info, 0, sizeof(*info));
+	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0);
+	if (status < 0)
+		goto error;
+	status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0);
+
+error:
+	dprintk("NFS reply statfs: %d\n", status);
+	return status;
+}
+
+struct nfs_rpc_ops	nfs_v3_clientops = {
+	3,			/* protocol version */
+	nfs3_proc_get_root,
+	nfs3_proc_getattr,
+	nfs3_proc_setattr,
+	nfs3_proc_lookup,
+	nfs3_proc_access,
+	nfs3_proc_readlink,
+	nfs3_proc_read,
+	nfs3_proc_write,
+	NULL,			/* commit */
+	nfs3_proc_create,
+	nfs3_proc_remove,
+	nfs3_proc_rename,
+	nfs3_proc_link,
+	nfs3_proc_symlink,
+	nfs3_proc_mkdir,
+	nfs3_proc_rmdir,
+	nfs3_proc_readdir,
+	nfs3_proc_mknod,
+	nfs3_proc_statfs,
+
+	nfs3_decode_dirent,
+};
--- linux/fs/nfs/nfs3xdr.c.nfsattack-gafton	Sun Dec 21 00:06:01 1997
+++ linux/fs/nfs/nfs3xdr.c	Fri Feb  4 23:26:34 2000
@@ -1,94 +1,152 @@
 /*
- * linux/fs/nfs/nfs2xdr.c
+ * linux/fs/nfs/nfs3xdr.c
  *
  * XDR functions to encode/decode NFSv3 RPC arguments and results.
- * Note: this is incomplete!
  *
- * Copyright (C) 1996 Olaf Kirch
+ * Copyright (C) 1996, 1997 Olaf Kirch
  */
 
-#define NFS_NEED_XDR_TYPES
+#define NFS_NEED_NFS3_XDR_TYPES
 
 #include <linux/param.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
-#include <linux/nfs_fs.h>
 #include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
 #include <linux/pagemap.h>
 #include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
 
-#ifdef RPC_DEBUG
-# define RPC_FACILITY		RPCDBG_NFS
-#endif
+/* Uncomment this to support servers requiring longword lengths */
+#define NFS_PAD_WRITES		1
 
-#define QUADLEN(len)		(((len) + 3) >> 2)
-static int			nfs_stat_to_errno(int stat);
+#define NFSDBG_FACILITY		NFSDBG_XDR
 
 /* Mapping from NFS error code to "errno" error code. */
 #define errno_NFSERR_IO		EIO
 
+extern int			nfs_stat_to_errno(int);
+
 /*
  * Declare the space requirements for NFS arguments and replies as
  * number of 32bit-words
  */
-#define NFS_fhandle_sz		(1+16)
-#define NFS_sattr_sz		8
-#define NFS_filename_sz		1+(NFS_MAXNAMLEN>>2)
-#define NFS_path_sz		1+(NFS_MAXPATHLEN>>2)
-#define NFS_fattr_sz		17
-#define NFS_info_sz		5
+#define NFS_fhandle_sz		1+16
+#define NFS_fh_sz		NFS_fhandle_sz	/* shorthand */
+#define NFS_sattr_sz		15
+#define NFS_filename_sz		1+(NFS3_MAXNAMLEN>>2)
+#define NFS_path_sz		1+(NFS3_MAXPATHLEN>>2)
+#define NFS_fattr_sz		21
+#define NFS_wcc_attr_sz		6
+#define NFS_pre_op_attr_sz	1+NFS_wcc_attr_sz
+#define NFS_post_op_attr_sz	1+NFS_fattr_sz
+#define NFS_wcc_data_sz		NFS_pre_op_attr_sz+NFS_post_op_attr_sz
+#define NFS_fsstat_sz		
+#define NFS_fsinfo_sz		
+#define NFS_pathconf_sz		
 #define NFS_entry_sz		NFS_filename_sz+3
 
 #define NFS_enc_void_sz		0
-#define NFS_diropargs_sz	NFS_fhandle_sz+NFS_filename_sz
-#define NFS_sattrargs_sz	NFS_fhandle_sz+NFS_sattr_sz
-#define NFS_readargs_sz		NFS_fhandle_sz+3
-#define NFS_writeargs_sz	NFS_fhandle_sz+4
+#define NFS_sattrargs_sz	NFS_fh_sz+NFS_sattr_sz+3
+#define NFS_diropargs_sz	NFS_fh_sz+NFS_filename_sz
+#define NFS_accessargs_sz	NFS_fh_sz+1
+#define NFS_readlinkargs_sz	NFS_fh_sz
+#define NFS_readargs_sz		NFS_fh_sz+3
+#define NFS_writeargs_sz	NFS_fh_sz+5
 #define NFS_createargs_sz	NFS_diropargs_sz+NFS_sattr_sz
-#define NFS_renameargs_sz	NFS_diropargs_sz+NFS_diropargs_sz
-#define NFS_linkargs_sz		NFS_fhandle_sz+NFS_diropargs_sz
+#define NFS_mkdirargs_sz	NFS_diropargs_sz+NFS_sattr_sz
 #define NFS_symlinkargs_sz	NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz
-#define NFS_readdirargs_sz	NFS_fhandle_sz+2
+#define NFS_mknodargs_sz	NFS_diropargs_sz+2+NFS_sattr_sz
+#define NFS_renameargs_sz	NFS_diropargs_sz+NFS_diropargs_sz
+#define NFS_linkargs_sz		NFS_fh_sz+NFS_diropargs_sz
+#define NFS_readdirargs_sz	NFS_fh_sz+2
+#define NFS_commitargs_sz	NFS_fh_sz+3
 
 #define NFS_dec_void_sz		0
 #define NFS_attrstat_sz		1+NFS_fattr_sz
-#define NFS_diropres_sz		1+NFS_fhandle_sz+NFS_fattr_sz
-#define NFS_readlinkres_sz	1+NFS_path_sz
-#define NFS_readres_sz		1+NFS_fattr_sz+1
-#define NFS_stat_sz		1
-#define NFS_readdirres_sz	1
-#define NFS_statfsres_sz	1+NFS_info_sz
+#define NFS_wccstat_sz		1+NFS_wcc_data_sz
+#define NFS_lookupres_sz	1+NFS_fh_sz+(2 * NFS_post_op_attr_sz)
+#define NFS_accessres_sz	1+NFS_post_op_attr_sz+1
+#define NFS_readlinkres_sz	1+NFS_post_op_attr_sz
+#define NFS_readres_sz		1+NFS_post_op_attr_sz+3
+#define NFS_writeres_sz		1+NFS_wcc_data_sz+4
+#define NFS_createres_sz	1+NFS_fh_sz+NFS_post_op_attr_sz+NFS_wcc_data_sz
+#define NFS_renameres_sz	1+(2 * NFS_wcc_data_sz)
+#define NFS_linkres_sz		1+NFS_post_op_attr_sz+NFS_wcc_data_sz
+#define NFS_readdirres_sz	1+NFS_post_op_attr_sz+2
+#define NFS_fsstatres_sz	1+NFS_post_op_attr_sz+13
+#define NFS_fsinfores_sz	1+NFS_post_op_attr_sz+12
+#define NFS_pathconfres_sz	1+NFS_post_op_attr_sz+6
+#define NFS_commitres_sz	1+NFS_wcc_data_sz+2
+
+/*
+ * Map file type to S_IFMT bits
+ */
+static struct {
+	unsigned int	mode;
+	unsigned int	nfs2type;
+} nfs_type2fmt[] = {
+      { 0,		NFNON	},
+      { S_IFREG,	NFREG	},
+      { S_IFDIR,	NFDIR	},
+      { S_IFBLK,	NFBLK	},
+      { S_IFCHR,	NFCHR	},
+      { S_IFLNK,	NFLNK	},
+      { S_IFSOCK,	NFSOCK	},
+      { S_IFIFO,	NFFIFO	},
+      { 0,		NFBAD	}
+};
 
 /*
  * Common NFS XDR functions as inlines
  */
 static inline u32 *
-xdr_encode_fhandle(u32 *p, struct nfs3_fh *fh)
+xdr_encode_fhandle(u32 *p, struct nfs_fh *fh)
 {
 	*p++ = htonl(fh->size);
 	memcpy(p, fh->data, fh->size);
-	return p + QUADLEN(fh->size);
+	return p + XDR_QUADLEN(fh->size);
 }
 
 static inline u32 *
-xdr_decode_fhandle(u32 *p, struct nfs3_fh *fh)
+xdr_decode_fhandle(u32 *p, struct nfs_fh *fh)
 {
+	/*
+	 * Zero all nonused bytes
+	 */
+	memset((u8 *)fh, 0, sizeof(*fh));
 	if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) {
 		memcpy(fh->data, p, fh->size);
-		return p + QUADLEN(fh->size);
+		return p + XDR_QUADLEN(fh->size);
 	}
 	return NULL;
 }
 
-static inline enum nfs_ftype
-xdr_decode_ftype(u32 type)
+/*
+ * Encode/decode time.
+ * Since the VFS doesn't care for fractional times, we ignore the
+ * nanosecond field.
+ */
+static inline u32 *
+xdr_encode_time(u32 *p, time_t time)
+{
+	*p++ = htonl(time);
+	*p++ = 0;
+	return p;
+}
+
+static inline u32 *
+xdr_decode_time(u32 *p, u64 *timep)
 {
-	return (type == NF3FIFO)? NFFIFO : (enum nfs_ftype) type;
+	*timep = ((u64)ntohl(*p++) << 32) + (u64)ntohl(*p++);
+	return p;
 }
 
 static inline u32 *
@@ -99,47 +157,105 @@
 	if (*len > maxlen)
 		return NULL;
 	*string = (char *) p;
-	return p + QUADLEN(*len);
+	return p + XDR_QUADLEN(*len);
 }
 
 static inline u32 *
-xdr_decode_fattr(u32 *p, struct nfs3_fattr *fattr)
+xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 {
-	fattr->type = xdr_decode_ftype(ntohl(*p++));
-	fattr->mode = ntohl(*p++);
+	unsigned int	type;
+	int		fmode;
+
+	type = ntohl(*p++);
+	if (type >= NF3BAD)
+		type = NF3BAD;
+	fmode = nfs_type2fmt[type].mode;
+	fattr->type = nfs_type2fmt[type].nfs2type;
+	fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
 	fattr->nlink = ntohl(*p++);
 	fattr->uid = ntohl(*p++);
 	fattr->gid = ntohl(*p++);
-	fattr->size = ((u64) ntohl(*p++) << 32) | ntohl(*p++);
-	fattr->used = ((u64) ntohl(*p++) << 32) | ntohl(*p++);
-	fattr->rdev_maj = ntohl(*p++);
-	fattr->rdev_min = ntohl(*p++);
-	fattr->fsid = ntohl(*p++);
-	fattr->fileid = ntohl(*p++);
-	fattr->atime.seconds = ntohl(*p++);
-	fattr->atime.useconds = ntohl(*p++);
-	fattr->mtime.seconds = ntohl(*p++);
-	fattr->mtime.useconds = ntohl(*p++);
-	fattr->ctime.seconds = ntohl(*p++);
-	fattr->ctime.useconds = ntohl(*p++);
+	p = xdr_decode_hyper(p, &fattr->size);
+	p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
+	/* Turn remote device info into Linux-specific dev_t */
+	fattr->rdev = (ntohl(*p++) << MINORBITS) | (ntohl(*p++) & MINORMASK);
+	p = xdr_decode_hyper(p, &fattr->fsid);
+	p = xdr_decode_hyper(p, &fattr->fileid);
+	p = xdr_decode_time(p, &fattr->atime);
+	p = xdr_decode_time(p, &fattr->mtime);
+	p = xdr_decode_time(p, &fattr->ctime);
+
+	/* Update the mode bits */
+	fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
 	return p;
 }
 
 static inline u32 *
-xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr)
+xdr_encode_sattr(u32 *p, struct iattr *attr)
+{
+	if (attr->ia_valid & ATTR_MODE) {
+		*p++ = xdr_one;
+		*p++ = htonl(attr->ia_mode);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_UID) {
+		*p++ = xdr_one;
+		*p++ = htonl(attr->ia_uid);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_GID) {
+		*p++ = xdr_one;
+		*p++ = htonl(attr->ia_gid);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_SIZE) {
+		*p++ = xdr_one;
+		p = xdr_encode_hyper(p, (__u64) attr->ia_size);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_ATIME_SET) {
+		*p++ = xdr_two;
+		p = xdr_encode_time(p, attr->ia_atime);
+	} else if (attr->ia_valid & ATTR_ATIME) {
+		*p++ = xdr_one;
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_MTIME_SET) {
+		*p++ = xdr_two;
+		p = xdr_encode_time(p, attr->ia_mtime);
+	} else if (attr->ia_valid & ATTR_MTIME) {
+		*p++ = xdr_one;
+	} else {
+		*p++ = xdr_zero;
+	}
+	return p;
+}
+
+static u32 *
+xdr_decode_post_op_attr(u32 *p, struct nfs_fattr *fattr)
 {
-	*p++ = htonl(sattr->mode);
-	*p++ = htonl(sattr->uid);
-	*p++ = htonl(sattr->gid);
-	*p++ = htonl(sattr->size >> 32);
-	*p++ = htonl(sattr->size & 0xFFFFFFFF);
-	*p++ = htonl(sattr->atime.seconds);
-	*p++ = htonl(sattr->atime.useconds);
-	*p++ = htonl(sattr->mtime.seconds);
-	*p++ = htonl(sattr->mtime.useconds);
+	if (*p++)
+		p = xdr_decode_fattr(p, fattr);
 	return p;
 }
 
+static u32 *
+xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr)
+{
+	if (ntohl(*p++) != 0) {
+		p = xdr_decode_hyper(p, &fattr->pre_size);
+		p = xdr_decode_time(p, &fattr->pre_mtime);
+		p = xdr_decode_time(p, &fattr->pre_ctime);
+		fattr->valid |= NFS_ATTR_WCC;
+	}
+	return xdr_decode_post_op_attr(p, fattr);
+}
+
 /*
  * NFS encode functions
  */
@@ -147,7 +263,7 @@
  * Encode void argument
  */
 static int
-nfs_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy)
+nfs3_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy)
 {
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
@@ -155,10 +271,9 @@
 
 /*
  * Encode file handle argument
- * GETATTR, READLINK, STATFS
  */
 static int
-nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs3_fh *fh)
+nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
 {
 	p = xdr_encode_fhandle(p, fh);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
@@ -169,23 +284,37 @@
  * Encode SETATTR arguments
  */
 static int
-nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
+nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_sattr(p, args->sattr);
+	*p++ = htonl(args->guard);
+	if (args->guard)
+		p = xdr_encode_time(p, args->guardtime);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
 
 /*
  * Encode directory ops argument
- * LOOKUP, REMOVE, RMDIR
  */
 static int
-nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
+nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_string(p, args->name, args->len);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode access() argument
+ */
+static int
+nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
-	p = xdr_encode_string(p, args->name);
+	*p++ = htonl(args->access);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
@@ -193,149 +322,179 @@
 /*
  * Arguments to a READ call. Since we read data directly into the page
  * cache, we also set up the reply iovec here so that iov[1] points
- * exactly to the page wewant to fetch.
+ * exactly to the page we want to fetch.
  */
 static int
-nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
 {
 	struct rpc_auth	*auth = req->rq_task->tk_auth;
-	int		replen, buflen;
+	int		buflen, replen;
+	unsigned int	nr;
 
 	p = xdr_encode_fhandle(p, args->fh);
-	*p++ = htonl(args->offset);
-	*p++ = htonl(args->count);
+	p = xdr_encode_hyper(p, args->offset);
 	*p++ = htonl(args->count);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 
-#if 1
+	/* Get the number of buffers in the receive iovec */
+	nr = args->nriov;
+
+	if (nr+2 > MAX_IOVEC) {
+		printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n");
+		return -EINVAL;
+	}
+
 	/* set up reply iovec */
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
 	buflen = req->rq_rvec[0].iov_len;
 	req->rq_rvec[0].iov_len  = replen;
-	req->rq_rvec[1].iov_base = args->buffer;
-	req->rq_rvec[1].iov_len  = args->count;
-	req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
-	req->rq_rvec[2].iov_len  = buflen - replen;
+
+	/* Copy the iovec */
+	memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec));
+
+	req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
+	req->rq_rvec[nr+1].iov_len  = buflen - replen;
 	req->rq_rlen = args->count + buflen;
-	req->rq_rnr = 3;
-#else
-	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
-	req->rq_rvec[0].iov_len  = replen;
-#endif
+	req->rq_rnr += nr+1;
 
 	return 0;
 }
 
 /*
- * Decode READ reply
+ * Write arguments. Splice the buffer to be written into the iovec.
  */
 static int
-nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
 {
-	struct iovec *iov = req->rq_rvec;
-	int	status, count, recvd, hdrlen;
+	unsigned int	nr;
+	u32 count = args->count;
 
-	dprintk("RPC:      readres OK status %lx\n", ntohl(*p));
-	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
-	p = xdr_decode_fattr(p, res->fattr);
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_hyper(p, args->offset);
+	*p++ = htonl(args->count);
+	*p++ = htonl(args->stable);
+	*p++ = htonl(args->count);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 
-	count = ntohl(*p++);
-	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
-	recvd = req->rq_rlen - hdrlen;
-	if (p != iov[2].iov_base) {
-		/* Unexpected reply header size. Punt.
-		 * XXX: Move iovec contents to align data on page
-		 * boundary and adjust RPC header size guess */
-		printk("NFS: Odd RPC header size in read reply: %d\n", hdrlen);
-		return -errno_NFSERR_IO;
-	}
-	if (count > recvd) {
-		printk("NFS: server cheating in read reply: "
-			"count %d > recvd %d\n", count, recvd);
-		count = recvd;
+	/* Get the number of buffers in the send iovec */
+	nr = args->nriov;
+
+	if (nr+2 > MAX_IOVEC) {
+		printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs\n");
+		return -EINVAL;
 	}
 
-	dprintk("RPC:      readres OK count %d\n", count);
-	if (count < res->count)
-		memset((u8 *)(iov[1].iov_base+count), 0, res->count-count);
+	/* Copy the iovec */
+	memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec));
+	req->rq_slen += args->count;
+	req->rq_snr  += nr;
 
-	return count;
-}
+#ifdef NFS_PAD_WRITES
+	/*
+	 * Some old servers require that the message length
+	 * be a multiple of 4, so we pad it here if needed.
+	 */
+	count = ((count + 3) & ~3) - count;
+	if (count) {
+		req->rq_svec[req->rq_snr].iov_base = (void *) "\0\0\0";
+		req->rq_svec[req->rq_snr].iov_len  = count;
+		req->rq_slen += count;
+		req->rq_snr++;
+	}
+#endif
 
+	return 0;
+}
 
 /*
- * Write arguments. Splice the buffer to be written into the iovec.
+ * Encode CREATE arguments
  */
 static int
-nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
-	*p++ = htonl(args->offset);
-	*p++ = htonl(args->offset);
-	*p++ = htonl(args->count);
-	*p++ = htonl(args->count);
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-
-	req->rq_svec[1].iov_base = (void *) args->buffer;
-	req->rq_svec[1].iov_len = args->count;
-	req->rq_slen += args->count;
-	req->rq_snr = 2;
+	p = xdr_encode_string(p, args->name, args->len);
 
+	*p++ = htonl(args->createmode);
+	if (args->createmode == NFS3_CREATE_EXCLUSIVE) {
+		*p++ = args->verifier[0];
+		*p++ = args->verifier[1];
+	} else {
+		p = xdr_encode_sattr(p, args->sattr);
+	}
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
 
 /*
- * Encode create arguments
- * CREATE, MKDIR
+ * Encode MKDIR arguments
  */
 static int
-nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
+nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
-	p = xdr_encode_string(p, args->name);
+	p = xdr_encode_string(p, args->name, args->len);
 	p = xdr_encode_sattr(p, args->sattr);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
 
 /*
- * Encode RENAME arguments
+ * Encode SYMLINK arguments
  */
 static int
-nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
+nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
-	p = xdr_encode_string(p, args->fromname);
-	p = xdr_encode_fhandle(p, args->tofh);
-	p = xdr_encode_string(p, args->toname);
+	p = xdr_encode_string(p, args->fromname, args->fromlen);
+	p = xdr_encode_sattr(p, args->sattr);
+	p = xdr_encode_string(p, args->topath, args->tolen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
 
 /*
- * Encode LINK arguments
+ * Encode MKNOD arguments
+ */
+static int
+nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_string(p, args->name, args->len);
+	*p++ = htonl(args->type);
+	p = xdr_encode_sattr(p, args->sattr);
+	if (args->type == NF3CHR || args->type == NF3BLK) {
+		*p++ = htonl(args->rdev >> MINORBITS);
+		*p++ = htonl(args->rdev & MINORMASK);
+	}
+
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode RENAME arguments
  */
 static int
-nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
+nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_string(p, args->fromname, args->fromlen);
 	p = xdr_encode_fhandle(p, args->tofh);
-	p = xdr_encode_string(p, args->toname);
+	p = xdr_encode_string(p, args->toname, args->tolen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
 
 /*
- * Encode SYMLINK arguments
+ * Encode LINK arguments
  */
 static int
-nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
+nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
-	p = xdr_encode_string(p, args->fromname);
-	p = xdr_encode_string(p, args->topath);
-	p = xdr_encode_sattr(p, args->sattr);
+	p = xdr_encode_fhandle(p, args->tofh);
+	p = xdr_encode_string(p, args->toname, args->tolen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
@@ -344,131 +503,163 @@
  * Encode arguments to readdir call
  */
 static int
-nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
+nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args)
 {
 	struct rpc_auth	*auth = req->rq_task->tk_auth;
-	int		replen;
+	int		buflen, replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
-	*p++ = htonl(args->cookie);
+	p = xdr_encode_hyper(p, args->cookie);
+	*p++ = args->verf[0];
+	*p++ = args->verf[1];
+	if (args->plus) {
+		/* readdirplus: need dircount + buffer size.
+		 * We just make sure we make dircount big enough */
+		*p++ = htonl(args->bufsiz >> 3);
+	}
 	*p++ = htonl(args->bufsiz);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 
 	/* set up reply iovec */
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
-	/*
-	dprintk("RPC: readdirargs: slack is 4 * (%d + %d + %d) = %d\n",
-		RPC_REPHDRSIZE, auth->au_rslack, NFS_readdirres_sz, replen);
-	 */
+	buflen = req->rq_rvec[0].iov_len;
 	req->rq_rvec[0].iov_len  = replen;
 	req->rq_rvec[1].iov_base = args->buffer;
 	req->rq_rvec[1].iov_len  = args->bufsiz;
-	req->rq_rlen = replen + args->bufsiz;
-	req->rq_rnr = 2;
-
-	/*
-	dprintk("RPC:      readdirargs set up reply vec:\n");
-	dprintk("          rvec[0] = %p/%d\n",
-			req->rq_rvec[0].iov_base,
-			req->rq_rvec[0].iov_len);
-	dprintk("          rvec[1] = %p/%d\n",
-			req->rq_rvec[1].iov_base,
-			req->rq_rvec[1].iov_len);
-	 */
+	req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
+	req->rq_rvec[2].iov_len  = buflen - replen;
+	req->rq_rlen = buflen + args->bufsiz;
+	req->rq_rnr += 2;
 
 	return 0;
 }
 
 /*
- * Decode the result of a readdir call. We decode the result in place
- * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name.
- * After decoding, the layout in memory looks like this:
- *	entry1 entry2 ... entryN <space> stringN ... string2 string1
- * Each entry consists of three __u32 values, the same space as NFS uses.
- * Note that the strings are not null-terminated so that the entire number
- * of entries returned by the server should fit into the buffer.
- */
-static int
-nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
-{
-	struct iovec		*iov = req->rq_rvec;
-	int			status, nr, len;
-	char			*string, *start;
-	u32			*end;
-	__u32			fileid, cookie, *entry;
-
-	if ((status = ntohl(*p++)))
+ * Decode the result of a readdir call.
+ * We just check for syntactical correctness.
+ */
+static int
+nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res)
+{
+	struct iovec	*iov = req->rq_rvec;
+	int		hdrlen;
+	int		status, nr;
+	unsigned int	len;
+	u32		*entry, *end;
+
+	status = ntohl(*p++);
+	/* Decode post_op_attrs */
+	p = xdr_decode_post_op_attr(p, res->dir_attr);
+	if (status)
 		return -nfs_stat_to_errno(status);
-	if ((void *) p != ((u8 *) iov->iov_base+iov->iov_len)) {
-		/* Unexpected reply header size. Punt. */
-		printk("NFS: Odd RPC header size in readdirres reply\n");
-		return -errno_NFSERR_IO;
+	/* Decode verifier cookie */
+	if (res->verf) {
+		res->verf[0] = *p++;
+		res->verf[1] = *p++;
+	} else {
+		p += 2;
 	}
 
-	p = (u32 *) iov[1].iov_base;
-	end = (u32 *) ((u8 *) p + iov[1].iov_len);
-
-	if (p != res->buffer) {
-		printk("NFS: p != res->buffer in %s:%d!!!\n",
-					__FILE__, __LINE__);
-		return -errno_NFSERR_IO;
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len > hdrlen) {
+		dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
+		xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen);
 	}
 
-	entry  = (__u32 *) res->buffer;
-	start  = (char *) res->buffer;
-	string = start + res->bufsiz;
+	p   = (u32 *) iov[1].iov_base;
+	end = (u32 *) ((u8 *) p + iov[1].iov_len);
 	for (nr = 0; *p++; nr++) {
-		fileid = ntohl(*p++);
-
-		len = ntohl(*p++);
-		if ((p + QUADLEN(len) + 3) > end) {
-			printk(KERN_NOTICE
-				"NFS: short packet in readdir reply!\n");
-			break;
-		}
-		if (len > NFS_MAXNAMLEN) {
-			printk("NFS: giant filename in readdir (len %x)!\n",
+		entry = p - 1;
+		p += 2;				/* inode # */
+		len = ntohl(*p++);		/* string length */
+		p += XDR_QUADLEN(len) + 2;	/* name + cookie */
+		if (len > NFS3_MAXNAMLEN) {
+			printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n",
 						len);
 			return -errno_NFSERR_IO;
 		}
-		string -= len;
-		if ((void *) (entry+3) > (void *) string) {
-			/* 
-			 * This error is impossible as long as the temp
-			 * buffer is no larger than the user buffer. The 
-			 * current packing algorithm uses the same amount
-			 * of space in the user buffer as in the XDR data,
-			 * so it's guaranteed to fit.
-			 */
-			printk("NFS: incorrect buffer size in %s!\n",
-				__FUNCTION__);
+
+		if (res->plus) {
+			/* post_op_attr */
+			if (*p++)
+				p += 21;
+			/* post_op_fh3 */
+			if (*p++) {
+				len = ntohl(*p++);
+				if (len > NFS3_FHSIZE) {
+					printk(KERN_WARNING "NFS: giant filehandle in "
+						"readdir (len %x)!\n", len);
+					return -errno_NFSERR_IO;
+				}
+				p += XDR_QUADLEN(len);
+			}
+		}
+
+		if (p + 2 > end) {
+			printk(KERN_NOTICE
+				"NFS: short packet in readdir reply!\n");
+			/* truncate listing */
+			entry[0] = entry[1] = 0;
 			break;
 		}
+	}
 
-		memmove(string, p, len);
-		p += QUADLEN(len);
-		cookie = ntohl(*p++);
-		/*
-		 * To make everything fit, we encode the length, offset,
-		 * and eof flag into 32 bits. This works for filenames
-		 * up to 32K and PAGE_SIZE up to 64K.
-		 */
-		status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */
-		*entry++ = fileid;
-		*entry++ = cookie;
-		*entry++ = ((string - start) << 16) | status | (len & 0x7FFF);
-		/*
-		dprintk("NFS: decoded dirent %.*s cookie %d eof %d\n",
-			len, string, cookie, status);
-		 */
-	}
-#ifdef NFS_PARANOIA
-printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n",
-nr, ((char *) entry - start), (start + res->bufsiz - string));
-#endif
 	return nr;
 }
 
+u32 *
+nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	struct nfs_entry old = *entry;
+
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	p = xdr_decode_hyper(p, &entry->ino);
+	entry->len  = ntohl(*p++);
+	entry->name = (const char *) p;
+	p += XDR_QUADLEN(entry->len);
+	p = xdr_decode_hyper(p, &entry->cookie);
+
+	if (plus) {
+		p = xdr_decode_post_op_attr(p, &entry->fattr);
+		/* In fact, a post_op_fh3: */
+		if (*p++) {
+			p = xdr_decode_fhandle(p, &entry->fh);
+			/* Ugh -- server reply was truncated */
+			if (p == NULL) {
+				dprintk("NFS: FH truncated\n");
+				*entry = old;
+				return ERR_PTR(-EAGAIN);
+			}
+		} else {
+			/* If we don't get a file handle, the attrs
+			 * aren't worth a lot. */
+			entry->fattr.valid = 0;
+		}
+	}
+
+	entry->eof = !p[0] && p[1];
+	return p;
+}
+
+/*
+ * Encode COMMIT arguments
+ */
+static int
+nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_hyper(p, args->offset);
+	*p++ = htonl(args->count);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
 /*
  * NFS XDR decode functions
  */
@@ -476,59 +667,94 @@
  * Decode void reply
  */
 static int
-nfs_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy)
+nfs3_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy)
 {
 	return 0;
 }
 
 /*
- * Decode simple status reply
+ * Decode attrstat reply.
  */
 static int
-nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
+nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
 {
 	int	status;
 
-	if ((status = ntohl(*p++)) != 0)
+	if ((status = ntohl(*p++)))
 		status = -nfs_stat_to_errno(status);
+	else if (xdr_decode_fattr(p, fattr) == 0)
+		return -errno_NFSERR_IO;
 	return status;
 }
 
 /*
- * Decode attrstat reply
- * GETATTR, SETATTR, WRITE
+ * Decode status+wcc_data reply
+ * SATTR, REMOVE, RMDIR
  */
 static int
-nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
 {
 	int	status;
 
-	dprintk("RPC:      attrstat status %lx\n", ntohl(*p));
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
-	xdr_decode_fattr(p, fattr);
-	dprintk("RPC:      attrstat OK type %d mode %o dev %x ino %x\n",
-		fattr->type, fattr->mode, fattr->fsid, fattr->fileid);
-	return 0;
+		status = -nfs_stat_to_errno(status);
+	xdr_decode_wcc_data(p, fattr);
+	return status;
+}
+
+/*
+ * Decode LOOKUP reply
+ */
+static int
+nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++))) {
+		status = -nfs_stat_to_errno(status);
+	} else {
+		if (!(p = xdr_decode_fhandle(p, res->fh)))
+			return -errno_NFSERR_IO;
+		p = xdr_decode_post_op_attr(p, res->fattr);
+	}
+	xdr_decode_post_op_attr(p, res->dir_attr);
+	return status;
 }
 
 /*
- * Decode diropres reply
- * LOOKUP, CREATE, MKDIR
+ * Decode ACCESS reply
  */
 static int
-nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
+nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res)
 {
 	int	status;
 
-	dprintk("RPC:      diropres status %lx\n", ntohl(*p));
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
-	p = xdr_decode_fhandle(p, res->fh);
-	xdr_decode_fattr(p, res->fattr);
-	dprintk("RPC:      diropres OK type %x mode %o dev %x ino %x\n",
-		res->fattr->type, res->fattr->mode,
-		res->fattr->fsid, res->fattr->fileid);
+		status = -nfs_stat_to_errno(status);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	if (status == 0)
+		res->access = ntohl(*p++);
+	return status;
+}
+
+static int
+nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args)
+{
+	struct rpc_task *task = req->rq_task;
+	struct rpc_auth *auth = task->tk_auth;
+	int		buflen, replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
+	buflen = req->rq_rvec[0].iov_len;
+	req->rq_rvec[0].iov_len  = replen;
+	req->rq_rvec[1].iov_base = args->buffer;
+	req->rq_rvec[1].iov_len  = args->bufsiz;
+	req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen;
+	req->rq_rvec[2].iov_len  = buflen - replen;
+	req->rq_rlen = buflen + args->bufsiz;
+	req->rq_rnr += 2;
 	return 0;
 }
 
@@ -536,155 +762,299 @@
  * Decode READLINK reply
  */
 static int
-nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res)
+nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkres *res)
 {
+	struct iovec	*iov = req->rq_rvec;
+	int		hdrlen;
+	u32	*strlen;
+	char	*string;
 	int	status;
+	unsigned int len;
 
-	if ((status = ntohl(*p++)))
+	status = ntohl(*p++);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+
+	if (status != 0)
 		return -nfs_stat_to_errno(status);
-	xdr_decode_string2(p, res->string, res->lenp, res->maxlen);
 
-	/* Caller takes over the buffer here to avoid extra copy */
-	res->buffer = req->rq_task->tk_buffer;
-	req->rq_task->tk_buffer = NULL;
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len > hdrlen) {
+		dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+		xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen);
+	}
+
+	strlen = (u32*)res->buffer;
+	/* Convert length of symlink */
+	len = ntohl(*strlen);
+	if (len > res->bufsiz - 5)
+		len = res->bufsiz - 5;
+	*strlen = len;
+	/* NULL terminate the string we got */
+	string = (char *)(strlen + 1);
+	string[len] = 0;
 	return 0;
 }
 
 /*
- * Decode STATFS reply
+ * Decode READ reply
  */
 static int
-nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
 {
-	int	status;
+	struct iovec *iov = req->rq_rvec;
+	int	status, count, ocount, recvd, hdrlen;
 
-	if ((status = ntohl(*p++)))
+	status = ntohl(*p++);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+
+	if (status != 0)
 		return -nfs_stat_to_errno(status);
-	res->tsize = ntohl(*p++);
-	res->bsize = ntohl(*p++);
-	res->blocks = ntohl(*p++);
-	res->bfree = ntohl(*p++);
-	res->bavail = ntohl(*p++);
-	return 0;
+
+	/* Decode reply could and EOF flag. NFSv3 is somewhat redundant
+	 * in that it puts the count both in the res struct and in the
+	 * opaque data count. */
+	count    = ntohl(*p++);
+	res->eof = ntohl(*p++);
+	ocount   = ntohl(*p++);
+
+	if (ocount != count) {
+		printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n");
+		return -errno_NFSERR_IO;
+	}
+
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len > hdrlen) {
+		dprintk("NFS: READ header is short. iovec will be shifted.\n");
+		xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen);
+	}
+
+	recvd = req->rq_rlen - iov->iov_len;
+	if (count > recvd) {
+		printk(KERN_WARNING "NFS: server cheating in read reply: "
+			"count %d > recvd %d\n", count, recvd);
+		count = recvd;
+	}
+
+	if (count < res->count) {
+		xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count);
+		res->count = count;
+	}
+
+	return count;
 }
 
 /*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
+ * Decode WRITE response
  */
-static struct {
-	int stat;
-	int errno;
-} nfs_errtbl[] = {
-	{ NFS_OK,		0		},
-	{ NFSERR_PERM,		EPERM		},
-	{ NFSERR_NOENT,		ENOENT		},
-	{ NFSERR_IO,		errno_NFSERR_IO	},
-	{ NFSERR_NXIO,		ENXIO		},
-	{ NFSERR_EAGAIN,	EAGAIN		},
-	{ NFSERR_ACCES,		EACCES		},
-	{ NFSERR_EXIST,		EEXIST		},
-	{ NFSERR_XDEV,		EXDEV		},
-	{ NFSERR_NODEV,		ENODEV		},
-	{ NFSERR_NOTDIR,	ENOTDIR		},
-	{ NFSERR_ISDIR,		EISDIR		},
-	{ NFSERR_INVAL,		EINVAL		},
-	{ NFSERR_FBIG,		EFBIG		},
-	{ NFSERR_NOSPC,		ENOSPC		},
-	{ NFSERR_ROFS,		EROFS		},
-	{ NFSERR_NAMETOOLONG,	ENAMETOOLONG	},
-	{ NFSERR_NOTEMPTY,	ENOTEMPTY	},
-	{ NFSERR_DQUOT,		EDQUOT		},
-	{ NFSERR_STALE,		ESTALE		},
-#ifdef EWFLUSH
-	{ NFSERR_WFLUSH,	EWFLUSH		},
-#endif
-	{ -1,			EIO		}
-};
+static int
+nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+	int	status;
+
+	status = ntohl(*p++);
+	p = xdr_decode_wcc_data(p, res->fattr);
+
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	res->count = ntohl(*p++);
+	res->verf->committed = ntohl(*p++);
+	res->verf->verifier[0] = *p++;
+	res->verf->verifier[1] = *p++;
+
+	return res->count;
+}
 
+/*
+ * Decode a CREATE response
+ */
 static int
-nfs_stat_to_errno(int stat)
+nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
 {
-	int i;
+	int	status;
 
-	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
-		if (nfs_errtbl[i].stat == stat)
-			return nfs_errtbl[i].errno;
+	status = ntohl(*p++);
+	if (status == 0) {
+		if (*p++) {
+			if (!(p = xdr_decode_fhandle(p, res->fh)))
+				return -errno_NFSERR_IO;
+			p = xdr_decode_post_op_attr(p, res->fattr);
+		} else {
+			memset(res->fh, 0, sizeof(*res->fh));
+			/* Do decode post_op_attr but set it to NULL */
+			p = xdr_decode_post_op_attr(p, res->fattr);
+			res->fattr->valid = 0;
+		}
+	} else {
+		status = -nfs_stat_to_errno(status);
 	}
-	printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
-	return nfs_errtbl[i].errno;
+	p = xdr_decode_wcc_data(p, res->dir_attr);
+	return status;
 }
 
-#ifndef MAX
-# define MAX(a, b)	(((a) > (b))? (a) : (b))
-#endif
+/*
+ * Decode RENAME reply
+ */
+static int
+nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res)
+{
+	int	status;
 
-#define PROC(proc, argtype, restype)	\
-    { "nfs_" #proc,					\
-      (kxdrproc_t) nfs_xdr_##argtype,			\
-      (kxdrproc_t) nfs_xdr_##restype,			\
-      MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2	\
-    }
+	if ((status = ntohl(*p++)) != 0)
+		status = -nfs_stat_to_errno(status);
+	p = xdr_decode_wcc_data(p, res->fromattr);
+	p = xdr_decode_wcc_data(p, res->toattr);
+	return status;
+}
 
-static struct rpc_procinfo	nfs_procedures[18] = {
-    PROC(null,		enc_void,	dec_void),
-    PROC(getattr,	fhandle,	attrstat),
-    PROC(setattr,	sattrargs,	attrstat),
-    PROC(root,		enc_void,	dec_void),
-    PROC(lookup,	diropargs,	diropres),
-    PROC(readlink,	fhandle,	readlinkres),
-    PROC(read,		readargs,	readres),
-    PROC(writecache,	enc_void,	dec_void),
-    PROC(write,		writeargs,	attrstat),
-    PROC(create,	createargs,	diropres),
-    PROC(remove,	diropargs,	stat),
-    PROC(rename,	renameargs,	stat),
-    PROC(link,		linkargs,	stat),
-    PROC(symlink,	symlinkargs,	stat),
-    PROC(mkdir,		createargs,	diropres),
-    PROC(rmdir,		diropargs,	stat),
-    PROC(readdir,	readdirargs,	readdirres),
-    PROC(statfs,	fhandle,	statfsres),
-};
+/*
+ * Decode LINK reply
+ */
+static int
+nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
+{
+	int	status;
 
-static struct rpc_version	nfs_version2 = {
-	2,
-	sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
-	nfs_procedures
-};
+	if ((status = ntohl(*p++)) != 0)
+		status = -nfs_stat_to_errno(status);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	p = xdr_decode_wcc_data(p, res->dir_attr);
+	return status;
+}
 
-static struct rpc_version *	nfs_version[] = {
-	NULL,
-	NULL,
-	&nfs_version2
-};
+/*
+ * Decode FSSTAT reply
+ */
+static int
+nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+{
+	struct nfs_fattr dummy;
+	int		status;
 
-struct rpc_program	nfs_program = {
-	"nfs",
-	NFS_PROGRAM,
-	sizeof(nfs_version) / sizeof(nfs_version[0]),
-	nfs_version,
-	&nfs_rpcstat,
-};
+	status = ntohl(*p++);
+
+	p = xdr_decode_post_op_attr(p, &dummy);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	p = xdr_decode_hyper(p, &res->tbytes);
+	p = xdr_decode_hyper(p, &res->fbytes);
+	p = xdr_decode_hyper(p, &res->abytes);
+	p = xdr_decode_hyper(p, &res->tfiles);
+	p = xdr_decode_hyper(p, &res->ffiles);
+	p = xdr_decode_hyper(p, &res->afiles);
+
+	/* ignore invarsec */
+	return 0;
+}
+
+/*
+ * Decode FSINFO reply
+ */
+static int
+nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+{
+	struct nfs_fattr dummy;
+	int		status;
+
+	status = ntohl(*p++);
+
+	p = xdr_decode_post_op_attr(p, &dummy);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	res->rtmax  = ntohl(*p++);
+	res->rtpref = ntohl(*p++);
+	res->rtmult = ntohl(*p++);
+	res->wtmax  = ntohl(*p++);
+	res->wtpref = ntohl(*p++);
+	res->wtmult = ntohl(*p++);
+	res->dtpref = ntohl(*p++);
+	p = xdr_decode_hyper(p, &res->maxfilesize);
+
+	/* ignore time_delta and properties */
+	return 0;
+}
+
+/*
+ * Decode PATHCONF reply
+ */
+static int
+nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+{
+	struct nfs_fattr dummy;
+	int		status;
+
+	status = ntohl(*p++);
+
+	p = xdr_decode_post_op_attr(p, &dummy);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+	res->linkmax = ntohl(*p++);
+	res->namelen = ntohl(*p++);
+
+	/* ignore remaining fields */
+	return 0;
+}
 
 /*
- * RPC stats support
+ * Decode COMMIT reply
  */
 static int
-nfs_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
 {
-	return rpcstat_get_info(&nfs_rpcstat, buffer, start, offset, length);
+	int		status;
+
+	status = ntohl(*p++);
+	p = xdr_decode_wcc_data(p, res->fattr);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	res->verf->verifier[0] = *p++;
+	res->verf->verifier[1] = *p++;
+	return 0;
 }
 
-static struct proc_dir_entry	proc_nfsclnt = {
-	0, 3, "nfs",
-	S_IFREG | S_IRUGO, 1, 0, 0,
-	6, &proc_net_inode_operations,
-	nfs_get_info
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype)	\
+    { "nfs_" #proc,					\
+      (kxdrproc_t) nfs3_xdr_##argtype,			\
+      (kxdrproc_t) nfs3_xdr_##restype,			\
+      MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
+      0							\
+    }
+
+static struct rpc_procinfo	nfs3_procedures[22] = {
+  PROC(null,	 enc_void,	dec_void),
+  PROC(getattr,	 fhandle,	attrstat),
+  PROC(setattr,  sattrargs,	wccstat),
+  PROC(lookup,	 diropargs,	lookupres),
+  PROC(access,	 accessargs,	accessres),
+  PROC(readlink, readlinkargs,	readlinkres),
+  PROC(read,	 readargs,	readres),
+  PROC(write,	 writeargs,	writeres),
+  PROC(create,	 createargs,	createres),
+  PROC(mkdir,	 mkdirargs,	createres),
+  PROC(symlink,	 symlinkargs,	createres),
+  PROC(mknod,	 mknodargs,	createres),
+  PROC(remove,	 diropargs,	wccstat),
+  PROC(rmdir,	 diropargs,	wccstat),
+  PROC(rename,	 renameargs,	renameres),
+  PROC(link,	 linkargs,	linkres),
+  PROC(readdir,	 readdirargs,	readdirres),
+  PROC(readdir,	 readdirargs,	readdirres),
+  PROC(fsstat,	 fhandle,	fsstatres),
+  PROC(fsinfo,   fhandle,	fsinfores),
+  PROC(pathconf, fhandle,	pathconfres),
+  PROC(commit,	 commitargs,	commitres),
 };
 
-struct rpc_stat			nfs_rpcstat = {
-	NULL,			/* next */
-	&proc_nfsclnt,		/* /proc/net directory entry */
-	&nfs_program,		/* RPC program */
+struct rpc_version		nfs_version3 = {
+	3,
+	sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]),
+	nfs3_procedures
 };
+
--- linux/fs/nfs/nfsroot.c.nfsattack-gafton	Thu Apr 23 21:12:21 1998
+++ linux/fs/nfs/nfsroot.c	Fri Feb  4 23:26:34 2000
@@ -61,6 +61,9 @@
  *	Martin Mares	:	Use root_server_addr appropriately during setup.
  *	Martin Mares	:	Rewrote parameter parsing, now hopefully giving
  *				correct overriding.
+ *	Trond Myklebust :	Add in preliminary support for NFSv3 and TCP.
+ *				Fix bug in root_nfs_addr(). nfs_data.namlen
+ *				is NOT for the length of the hostname.
  */
 
 #include <linux/types.h>
@@ -70,7 +73,8 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/in.h>
@@ -85,6 +89,7 @@
 
 /* Default path we try to mount. "%s" gets replaced by our IP address */
 #define NFS_ROOT		"/tftpboot/%s"
+#define NFS_PORT		NFS2_PORT
 
 /* Parameters passed from the kernel command line */
 static char nfs_root_name[256] __initdata = "";
@@ -93,7 +98,7 @@
 static __u32 servaddr __initdata = 0;
 
 /* Name of directory to mount */
-static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
+static char nfs_path[PATH_MAX] __initdata = { 0, };
 
 /* NFS-related data */
 static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
@@ -145,6 +150,14 @@
 	{ "nocto",	~NFS_MOUNT_NOCTO,	NFS_MOUNT_NOCTO },
 	{ "ac",		~NFS_MOUNT_NOAC,	0 },
 	{ "noac",	~NFS_MOUNT_NOAC,	NFS_MOUNT_NOAC },
+	{ "lock",	~NFS_MOUNT_NONLM,	0 },
+	{ "nolock",	~NFS_MOUNT_NONLM,	NFS_MOUNT_NONLM },
+#ifdef CONFIG_NFS_V3
+	{ "v2",		~NFS_MOUNT_VER3,	0 },
+	{ "v3",		~NFS_MOUNT_VER3,	NFS_MOUNT_VER3 },
+#endif
+	{ "udp",	~NFS_MOUNT_TCP,		0 },
+	{ "tcp",	~NFS_MOUNT_TCP,		NFS_MOUNT_TCP },
 	{ NULL,		0,			0 }
 };
 
@@ -211,8 +224,8 @@
 		}
 	}
 	if (name[0] && strcmp(name, "default")) {
-		strncpy(buf, name, NFS_MAXPATHLEN-1);
-		buf[NFS_MAXPATHLEN-1] = 0;
+		strncpy(buf, name, PATH_MAX-1);
+		buf[PATH_MAX-1] = 0;
 	}
 }
 
@@ -222,17 +235,16 @@
  */
 __initfunc(static int root_nfs_name(char *name))
 {
-	char buf[NFS_MAXPATHLEN];
+	char buf[PATH_MAX];
 	char *cp;
 
 	/* Set some default values */
 	memset(&nfs_data, 0, sizeof(nfs_data));
 	nfs_port          = -1;
 	nfs_data.version  = NFS_MOUNT_VERSION;
-	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
-	nfs_data.rsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
-	nfs_data.wsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
-	nfs_data.bsize	  = 0;
+	/* It is ok to have lockd in nfs root since it will be started
+	   later manually in the rc script. */
+	nfs_data.flags    = 0;
 	nfs_data.timeo    = 7;
 	nfs_data.retrans  = 3;
 	nfs_data.acregmin = 3;
@@ -248,7 +260,7 @@
 	root_nfs_parse(name, buf);
 
 	cp = system_utsname.nodename;
-	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+	if (strlen(buf) + strlen(cp) > PATH_MAX) {
 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
 		return -1;
 	}
@@ -269,7 +281,6 @@
 	}
 
 	strncpy(nfs_data.hostname, in_ntoa(servaddr), sizeof(nfs_data.hostname)-1);
-	nfs_data.namlen = strlen(nfs_data.hostname);
 	return 0;
 }
 
@@ -356,14 +367,14 @@
 /*
  *  Query server portmapper for the port of a daemon program.
  */
-__initfunc(static int root_nfs_getport(int program, int version))
+__initfunc(static int root_nfs_getport(int program, int version, int proto))
 {
 	struct sockaddr_in sin;
 
 	printk(KERN_NOTICE "Looking up port of RPC %d/%d on %s\n",
 		program, version, in_ntoa(servaddr));
 	set_sockaddr(&sin, servaddr, 0);
-	return rpc_getport_external(&sin, program, version, IPPROTO_UDP);
+	return rpc_getport_external(&sin, program, version, proto);
 }
 
 
@@ -375,22 +386,39 @@
 __initfunc(static int root_nfs_ports(void))
 {
 	int port;
+	int nfsd_ver, mountd_ver;
+	int nfsd_port, mountd_port;
+	int proto;
+
+	if (nfs_data.flags & NFS_MOUNT_VER3) {
+		nfsd_ver = NFS3_VERSION;
+		mountd_ver = NFS_MNT3_VERSION;
+		nfsd_port = NFS_PORT;
+		mountd_port = NFS_MNT_PORT;
+	} else {
+		nfsd_ver = NFS2_VERSION;
+		mountd_ver = NFS_MNT_VERSION;
+		nfsd_port = NFS_PORT;
+		mountd_port = NFS_MNT_PORT;
+	}
+
+	proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 
 	if (nfs_port < 0) {
-		if ((port = root_nfs_getport(NFS_PROGRAM, NFS_VERSION)) < 0) {
+		if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
 			printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
 					"number from server, using default\n");
-			port = NFS_PORT;
+			port = nfsd_port;
 		}
 		nfs_port = htons(port);
 		dprintk("Root-NFS: Portmapper on server returned %d "
 			"as nfsd port\n", port);
 	}
 
-	if ((port = root_nfs_getport(NFS_MNT_PROGRAM, NFS_MNT_VERSION)) < 0) {
+	if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
 		printk(KERN_ERR "Root-NFS: Unable to get mountd port "
 				"number from server, using default\n");
-		port = NFS_MNT_PORT;
+		port = mountd_port;
 	}
 	mount_port = htons(port);
 	dprintk("Root-NFS: mountd port is %d\n", port);
@@ -409,7 +437,10 @@
 	int status;
 
 	set_sockaddr(&sin, servaddr, mount_port);
-	status = nfs_mount(&sin, nfs_path, &nfs_data.root);
+	if (nfs_data.flags & NFS_MOUNT_VER3)
+		status = nfs3_mount(&sin, nfs_path, &nfs_data.root);
+	else
+		status = nfs_mount(&sin, nfs_path, &nfs_data.root);
 	if (status < 0)
 		printk(KERN_ERR "Root-NFS: Server returned error %d "
 				"while mounting %s\n", status, nfs_path);
--- linux/fs/nfs/proc.c.nfsattack-gafton	Tue Jan 13 13:03:40 1998
+++ linux/fs/nfs/proc.c	Fri Feb  4 23:26:34 2000
@@ -13,10 +13,6 @@
  *  Note: Error returns are optimized for NFS_OK, which isn't translated via
  *  nfs_stat_to_errno(), but happens to be already the right return code.
  *
- *  FixMe: We ought to define a sensible small max size for
- *  things like getattr that are tiny packets and use the
- *  old get_free_page stuff with it.
- *
  *  Also, the code currently doesn't check the size of the packet, when
  *  it decodes the packet.
  *
@@ -25,211 +21,316 @@
  *  Completely rewritten to support the new RPC call interface;
  *  rewrote and moved the entire XDR stuff to xdr.c
  *  --Olaf Kirch June 1996
+ *
+ *  The code below initializes all auto variables explicitly, otherwise
+ *  it will fail to work as a module (gcc generates a memset call for an
+ *  incomplete struct).
  */
 
-#define NFS_NEED_XDR_TYPES
+#define NFS_NEED_NFS2_XDR_TYPES
 
+#include <linux/types.h>
 #include <linux/param.h>
+#include <linux/malloc.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
-#include <linux/malloc.h>
 #include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
 #include <linux/pagemap.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
 #include <linux/nfs_fs.h>
 
 #include <asm/segment.h>
 
-#ifdef NFS_DEBUG
-# define NFSDBG_FACILITY	NFSDBG_PROC
-#endif
 
 
 /*
+ * Typing short-hands
+ */
+typedef struct nfs_fattr	fattr;
+typedef struct qstr		qstr;
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+			fattr *fattr)
+{
+	int		status;
+
+	dprintk("NFS call  getroot\n");
+	fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0);
+	dprintk("NFS reply getroot\n");
+	return status;
+}
+
+/*
  * One function for each procedure in the NFS protocol.
  */
-int
-nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fattr *fattr)
+static int
+nfs_proc_getattr(struct dentry *dentry, fattr *fattr)
 {
 	int	status;
 
 	dprintk("NFS call  getattr\n");
-	status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0);
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_GETATTR,
+				NFS_FH(dentry), fattr, 0);
 	dprintk("NFS reply getattr\n");
 	return status;
 }
 
-int
-nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_sattr *sattr, struct nfs_fattr *fattr)
+static int
+nfs_proc_setattr(struct dentry *dentry, fattr *fattr,
+				struct iattr *sattr)
 {
-	struct nfs_sattrargs	arg = { fhandle, sattr };
+	struct nfs_sattrargs	arg = { NFS_FH(dentry), sattr };
 	int	status;
 
 	dprintk("NFS call  setattr\n");
-	status = rpc_call(server->client, NFSPROC_SETATTR, &arg, fattr, 0);
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_SETATTR, &arg, fattr, 0);
 	dprintk("NFS reply setattr\n");
 	return status;
 }
 
-int
-nfs_proc_lookup(struct nfs_server *server, struct nfs_fh *dir, const char *name,
-		    struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int
+nfs_proc_lookup(struct dentry *dir, fattr *dir_attr, qstr *name,
+		struct nfs_fh *fhandle, fattr *fattr)
 {
-	struct nfs_diropargs	arg = { dir, name };
+	struct nfs_diropargs	arg = { NFS_FH(dir), name->name, name->len };
 	struct nfs_diropok	res = { fhandle, fattr };
 	int			status;
 
-	dprintk("NFS call  lookup %s\n", name);
-	status = rpc_call(server->client, NFSPROC_LOOKUP, &arg, &res, 0);
+	dprintk("NFS call  lookup %s\n", name->name);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_LOOKUP, &arg, &res, 0);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
 
-int
-nfs_proc_readlink(struct nfs_server *server, struct nfs_fh *fhandle,
-			void **p0, char **string, unsigned int *len,
-			unsigned int maxlen)
+static int
+nfs_proc_readlink(struct dentry *dentry, fattr *fattr,
+			void *buffer, unsigned int bufsiz)
 {
-	struct nfs_readlinkres	res = { string, len, maxlen, NULL };
+	struct nfs_readlinkargs	args = { NFS_FH(dentry), buffer, bufsiz };
+	struct nfs_readlinkres	res = { buffer, bufsiz };
 	int			status;
 
 	dprintk("NFS call  readlink\n");
-	status = rpc_call(server->client, NFSPROC_READLINK, fhandle, &res, 0);
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_READLINK,
+					&args, &res, 0);
 	dprintk("NFS reply readlink: %d\n", status);
-	if (!status)
-		*p0 = res.buffer;
-	else if (res.buffer)
-		kfree(res.buffer);
 	return status;
 }
 
-int
-nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle, int swap,
-			  unsigned long offset, unsigned int count,
-			  void *buffer, struct nfs_fattr *fattr)
-{
-	struct nfs_readargs	arg = { fhandle, offset, count, buffer };
-	struct nfs_readres	res = { fattr, count };
+static int
+nfs_proc_read(struct dentry *dentry, fattr *fattr,
+	      struct rpc_cred *cred, int flags,
+	      unsigned long offset, unsigned int count,
+	      void *buffer, int *eofp)
+{
+	struct nfs_readargs	arg = { NFS_FH(dentry), offset, count, 1,
+				       {{ buffer, count }, {0,0}, {0,0}, {0,0},
+					{0,0}, {0,0}, {0,0}, {0,0}} };
+	struct nfs_readres	res = { fattr, count, 0};
+	struct rpc_message	msg = { NFSPROC_READ, &arg, &res, cred };
 	int			status;
 
 	dprintk("NFS call  read %d @ %ld\n", count, offset);
-	status = rpc_call(server->client, NFSPROC_READ, &arg, &res,
-			swap? NFS_RPC_SWAPFLAGS : 0);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, flags);
+
 	dprintk("NFS reply read: %d\n", status);
+	*eofp = res.eof;
 	return status;
 }
 
-int
-nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap,
-			unsigned long offset, unsigned int count,
-			const void *buffer, struct nfs_fattr *fattr)
-{
-	struct nfs_writeargs	arg = { fhandle, offset, count, buffer };
-	int			status;
+static int
+nfs_proc_write(struct dentry *dentry, fattr *fattr,
+	       struct rpc_cred *cred, int how,
+	       unsigned long offset, unsigned int count,
+	       void *buffer, struct nfs_writeverf *verf)
+{
+	struct nfs_writeargs	arg = {NFS_FH(dentry), offset, count, 1, 1,
+					{{buffer, count}, {0,0}, {0,0}, {0,0},
+					 {0,0}, {0,0}, {0,0}, {0,0}}};
+	struct nfs_writeres     res = {fattr, verf, count};
+	struct rpc_message	msg = { NFSPROC_WRITE, &arg, &res, cred };
+	int			status, flags = 0;
 
 	dprintk("NFS call  write %d @ %ld\n", count, offset);
-	status = rpc_call(server->client, NFSPROC_WRITE, &arg, fattr,
-			swap? (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) : 0);
-	dprintk("NFS reply read: %d\n", status);
+	fattr->valid = 0;
+	if (how & NFS_RW_SWAP)
+		flags |= NFS_RPC_SWAPFLAGS;
+	status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, flags);
+
+	dprintk("NFS reply write: %d\n", status);
+	verf->committed = NFS_FILE_SYNC;      /* NFSv2 always syncs data */
 	return status < 0? status : count;
 }
 
-int
-nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name, struct nfs_sattr *sattr,
-			struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int
+nfs_proc_create(struct dentry *dir, fattr *dir_attr,
+		qstr *name, struct iattr *sattr, int flags,
+		struct nfs_fh *fhandle, fattr *fattr)
 {
-	struct nfs_createargs	arg = { dir, name, sattr };
+	struct nfs_createargs	arg = { NFS_FH(dir), name->name,
+					name->len, sattr };
 	struct nfs_diropok	res = { fhandle, fattr };
 	int			status;
 
-	dprintk("NFS call  create %s\n", name);
-	status = rpc_call(server->client, NFSPROC_CREATE, &arg, &res, 0);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	dprintk("NFS call  create %s\n", name->name);
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_CREATE, &arg, &res, 0);
 	dprintk("NFS reply create: %d\n", status);
 	return status;
 }
 
-int
-nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir, const char *name)
+/*
+ * In NFSv2, mknod is grafted onto the create call.
+ */
+static int
+nfs_proc_mknod(struct dentry *dir, fattr *dir_attr,
+			qstr *name, struct iattr *sattr, dev_t rdev,
+			struct nfs_fh *fhandle, fattr *fattr)
 {
-	struct nfs_diropargs	arg = { dir, name };
-	int			status;
+	struct nfs_createargs	arg = { NFS_FH(dir), name->name,
+						     name->len, sattr };
+	struct nfs_diropok	res = { fhandle, fattr };
+	int			status, mode;
+
+	dprintk("NFS call  mknod %s\n", name->name);
+
+	mode = sattr->ia_mode;
+	if (S_ISFIFO(mode)) {
+		sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR;
+		sattr->ia_valid &= ~ATTR_SIZE;
+	} else if (S_ISCHR(rdev) || S_ISBLK(rdev)) {
+		sattr->ia_valid |= ATTR_SIZE;
+		sattr->ia_size   = rdev;	/* get out your barf bag */
+	}
+
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_CREATE, &arg, &res, 0);
+
+	if (status == -EINVAL && S_ISFIFO(mode)) {
+		sattr->ia_mode = mode;
+		dir_attr->valid = 0;
+		fattr->valid = 0;
+		status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_CREATE, &arg, &res, 0);
+	}
+	dprintk("NFS reply mknod: %d\n", status);
+	return status;
+}
+  
+static int
+nfs_proc_remove(struct dentry *dir, fattr *dir_attr,
+		qstr *name, struct rpc_cred *cred)
+{
+	struct nfs_diropargs	arg = { NFS_FH(dir), name->name, name->len };
+	struct rpc_message	msg = { NFSPROC_REMOVE, &arg, NULL, cred };
+	int			status;
+
+	dir_attr->valid = 0;
+	dprintk("NFS call  remove %s\n", name->name);
+	status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0);
 
-	dprintk("NFS call  remove %s\n", name);
-	status = rpc_call(server->client, NFSPROC_REMOVE, &arg, NULL, 0);
 	dprintk("NFS reply remove: %d\n", status);
 	return status;
 }
 
-int
-nfs_proc_rename(struct nfs_server *server,
-		struct nfs_fh *old_dir, const char *old_name,
-		struct nfs_fh *new_dir, const char *new_name)
-{
-	struct nfs_renameargs	arg = { old_dir, old_name, new_dir, new_name };
+static int
+nfs_proc_rename(struct dentry *old_dir, fattr *old_attr, qstr *old_name,
+		struct dentry *new_dir, fattr *new_attr, qstr *new_name)
+{
+	struct nfs_renameargs	arg = { NFS_FH(old_dir), old_name->name,
+					old_name->len,
+					NFS_FH(new_dir), new_name->name,
+					new_name->len};
 	int			status;
 
-	dprintk("NFS call  rename %s -> %s\n", old_name, new_name);
-	status = rpc_call(server->client, NFSPROC_RENAME, &arg, NULL, 0);
+	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+	old_attr->valid = 0;
+	new_attr->valid = 0;
+	status = rpc_call(NFS_CLIENT(old_dir->d_inode), NFSPROC_RENAME, &arg, NULL, 0);
 	dprintk("NFS reply rename: %d\n", status);
 	return status;
 }
 
-int
-nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fh *dir, const char *name)
-{
-	struct nfs_linkargs	arg = { fhandle, dir, name };
+static int
+nfs_proc_link(struct dentry *dentry, fattr *attr,
+	      struct dentry *dir, fattr *dir_attr, qstr *name)
+{
+	struct nfs_linkargs	arg = { NFS_FH(dentry), NFS_FH(dir),
+					name->name, name->len };
 	int			status;
 
-	dprintk("NFS call  link %s\n", name);
-	status = rpc_call(server->client, NFSPROC_LINK, &arg, NULL, 0);
+	dprintk("NFS call  link %s\n", name->name);
+	dir_attr->valid = 0;
+	attr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_LINK, &arg, NULL, 0);
 	dprintk("NFS reply link: %d\n", status);
 	return status;
 }
 
-int
-nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name, const char *path,
-			struct nfs_sattr *sattr)
-{
-	struct nfs_symlinkargs	arg = { dir, name, path, sattr };
+static int
+nfs_proc_symlink(struct dentry *dir, fattr *dir_attr, qstr *name,
+		 qstr *path, struct iattr *sattr,
+		 struct nfs_fh *sym_fh, fattr *sym_attr)
+{
+	struct nfs_symlinkargs	arg = { NFS_FH(dir), name->name, name->len,
+					path->name, path->len, sattr };
 	int			status;
 
-	dprintk("NFS call  symlink %s -> %s\n", name, path);
-	status = rpc_call(server->client, NFSPROC_SYMLINK, &arg, NULL, 0);
+	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+	dir_attr->valid = 0;
+	sym_attr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_SYMLINK, &arg, NULL, 0);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
 
-int
-nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name, struct nfs_sattr *sattr,
-			struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int
+nfs_proc_mkdir(struct dentry *dir, fattr *dir_attr, qstr *name,
+	       struct iattr *sattr,
+	       struct nfs_fh *fhandle, fattr *fattr)
 {
-	struct nfs_createargs	arg = { dir, name, sattr };
+	struct nfs_createargs	arg = { NFS_FH(dir), name->name, name->len,
+					sattr };
 	struct nfs_diropok	res = { fhandle, fattr };
 	int			status;
 
-	dprintk("NFS call  mkdir %s\n", name);
-	status = rpc_call(server->client, NFSPROC_MKDIR, &arg, &res, 0);
+	dprintk("NFS call  mkdir %s\n", name->name);
+	dir_attr->valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_MKDIR, &arg, &res, 0);
 	dprintk("NFS reply mkdir: %d\n", status);
 	return status;
 }
 
-int
-nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name)
+static int
+nfs_proc_rmdir(struct dentry *dir, fattr *dir_attr, qstr *name)
 {
-	struct nfs_diropargs	arg = { dir, name };
+	struct nfs_diropargs	arg = { NFS_FH(dir), name->name, name->len };
 	int			status;
 
-	dprintk("NFS call  rmdir %s\n", name);
-	status = rpc_call(server->client, NFSPROC_RMDIR, &arg, NULL, 0);
+	dprintk("NFS call  rmdir %s\n", name->name);
+	dir_attr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_RMDIR, &arg, NULL, 0);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
 }
@@ -237,66 +338,73 @@
 /*
  * The READDIR implementation is somewhat hackish - we pass a temporary
  * buffer to the encode function, which installs it in the receive
- * iovec. The dirent buffer itself is passed in the result struct.
+ * the receive iovec. The decode function just parses the reply to make
+ * sure it is syntactically correct; the entries itself are decoded
+ * from nfs_readdir by calling the decode_entry function directly.
  */
-int
-nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle,
-			u32 cookie, unsigned int size, __u32 *entry)
+static int
+nfs_proc_readdir(struct dentry *dir, fattr *dir_attr,
+		 struct rpc_cred *cred,
+		 __u64 cookie, void *entry, unsigned int size, int plus)
 {
 	struct nfs_readdirargs	arg;
 	struct nfs_readdirres	res;
-	void *			buffer;
-	unsigned int		buf_size = PAGE_SIZE;
+	struct rpc_message	msg = { NFSPROC_READDIR, &arg, &res, cred };
+	struct nfs_server       *server = NFS_DSERVER(dir);
 	int			status;
 
-	/* First get a temp buffer for the readdir reply */
-	/* N.B. does this really need to be cleared? */
-	status = -ENOMEM;
-	buffer = (void *) get_free_page(GFP_KERNEL);
-	if (!buffer)
-		goto out;
-
-	/*
-	 * Calculate the effective size the buffer.  To make sure
-	 * that the returned data will fit into the user's buffer,
-	 * we decrease the buffer size as necessary.
-	 *
-	 * Note: NFS returns three __u32 values for each entry,
-	 * and we assume that the data is packed into the user
-	 * buffer with the same efficiency. 
-	 */
-	if (size < buf_size)
-		buf_size = size;
-	if (server->rsize < buf_size)
-		buf_size = server->rsize;
-#if 0
-printk("nfs_proc_readdir: user size=%d, rsize=%d, buf_size=%d\n",
-size, server->rsize, buf_size);
-#endif
+	if (server->rsize < size)
+		size = server->rsize;
 
-	arg.fh = fhandle;
+	dir_attr->valid = 0;
+	arg.fh = NFS_FH(dir);
 	arg.cookie = cookie;
-	arg.buffer = buffer;
-	arg.bufsiz = buf_size;
+	arg.buffer = entry;
+	arg.bufsiz = size;
 	res.buffer = entry;
 	res.bufsiz = size;
 
-	dprintk("NFS call  readdir %d\n", cookie);
-	status = rpc_call(server->client, NFSPROC_READDIR, &arg, &res, 0);
+	dir_attr->valid = 0;
+	dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
+	status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0);
+
 	dprintk("NFS reply readdir: %d\n", status);
-	free_page((unsigned long) buffer);
-out:
 	return status;
 }
 
-int
+static int
 nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 			struct nfs_fsinfo *info)
 {
 	int	status;
 
 	dprintk("NFS call  statfs\n");
+	memset((char *)info, 0, sizeof(*info));
 	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	return status;
 }
+
+struct nfs_rpc_ops     nfs_v2_clientops = {
+       2,		       /* protocol version */
+       nfs_proc_get_root,
+       nfs_proc_getattr,
+       nfs_proc_setattr,
+       nfs_proc_lookup,
+       NULL,		       /* access */
+       nfs_proc_readlink,
+       nfs_proc_read,
+       nfs_proc_write,
+       NULL,		       /* commit */
+       nfs_proc_create,
+       nfs_proc_remove,
+       nfs_proc_rename,
+       nfs_proc_link,
+       nfs_proc_symlink,
+       nfs_proc_mkdir,
+       nfs_proc_rmdir,
+       nfs_proc_readdir,
+       nfs_proc_mknod,
+       nfs_proc_statfs,
+       nfs_decode_dirent,
+};
--- linux/fs/nfs/read.c.nfsattack-gafton	Mon Jun  7 16:25:57 1999
+++ linux/fs/nfs/read.c	Fri Feb  4 23:26:34 2000
@@ -15,7 +15,7 @@
  * within the RPC code when root squashing is suspected.
  */
 
-#define NFS_NEED_XDR_TYPES
+#define NFS_NEED_NFS2_XDR_TYPES
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -24,7 +24,11 @@
 #include <linux/mm.h>
 #include <linux/malloc.h>
 #include <linux/pagemap.h>
+#include <linux/file.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
 
 #include <asm/segment.h>
@@ -33,8 +37,9 @@
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 struct nfs_rreq {
-	struct inode *		ra_inode;	/* inode from which to read */
-	struct page *		ra_page;	/* page to be read */
+	struct inode		*ra_inode;	/* file to be read */
+	int			ra_npages;	/* number of pages to read */
+	struct page		*ra_page[NFS_READ_MAXIOV]; /* pages to read */
 	struct nfs_readargs	ra_args;	/* XDR argument struct */
 	struct nfs_readres	ra_res;		/* ... and result struct */
 	struct nfs_fattr	ra_fattr;	/* fattr storage */
@@ -50,15 +55,27 @@
  * Set up the NFS read request struct
  */
 static inline void
-nfs_readreq_setup(struct nfs_rreq *req, struct nfs_fh *fh,
-		  unsigned long offset, void *buffer, unsigned int rsize)
+nfs_readreq_setup(struct nfs_rreq *req, struct inode *inode,
+		  struct nfs_fh *fh,
+		  struct page **pages, int nr)
 {
+	int i;
+
+	req->ra_inode	    = inode;
 	req->ra_args.fh     = fh;
-	req->ra_args.offset = offset;
-	req->ra_args.count  = rsize;
-	req->ra_args.buffer = buffer;
+	req->ra_args.offset = (*pages)->offset;
+	req->ra_args.count  = nr*PAGE_CACHE_SIZE;
+	req->ra_args.nriov  = nr;
+	req->ra_npages	    = nr;
+	for (i = 0; i < nr; i++) {
+		req->ra_args.iov[i].iov_base = (void *)page_address(pages[i]);
+		req->ra_args.iov[i].iov_len = PAGE_CACHE_SIZE;
+		req->ra_page[i] = pages[i];
+	}
+	req->ra_fattr.valid = 0;
 	req->ra_res.fattr   = &req->ra_fattr;
-	req->ra_res.count   = rsize;
+	req->ra_res.count   = nr*PAGE_CACHE_SIZE;
+	req->ra_res.eof     = 0;
 }
 
 
@@ -66,32 +83,34 @@
  * Read a page synchronously.
  */
 static int
-nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page)
+nfs_readpage_sync(struct dentry *dentry, struct inode *inode,
+		  struct rpc_cred *cred, struct page *page)
 {
-	struct nfs_rreq	rqst;
+	struct nfs_fattr fattr;
 	unsigned long	offset = page->offset;
 	char		*buffer = (char *) page_address(page);
 	int		rsize = NFS_SERVER(inode)->rsize;
 	int		result, refresh = 0;
-	int		count = PAGE_SIZE;
-	int		flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
+	int		count = PAGE_CACHE_SIZE, chunk, eof = 0;
+	int		flags = 0;
+
+	if (IS_SWAPFILE(inode))
+		flags |= NFS_RPC_SWAPFLAGS;
 
 	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
 	clear_bit(PG_error, &page->flags);
 
 	do {
-		if (count < rsize)
-			rsize = count;
+		if ((chunk = rsize) > count)
+			chunk = count;
 
 		dprintk("NFS: nfs_proc_read(%s, (%s/%s), %ld, %d, %p)\n",
 			NFS_SERVER(inode)->hostname,
 			dentry->d_parent->d_name.name, dentry->d_name.name,
-			offset, rsize, buffer);
+			offset, chunk, buffer);
 
-		/* Set up arguments and perform rpc call */
-		nfs_readreq_setup(&rqst, NFS_FH(dentry), offset, buffer, rsize);
-		result = rpc_call(NFS_CLIENT(inode), NFSPROC_READ,
-					&rqst.ra_args, &rqst.ra_res, flags);
+		result = NFS_CALL(read, inode, (dentry, &fattr, cred, flags,
+						offset, chunk, buffer, &eof));
 
 		/*
 		 * Even if we had a partial success we can't mark the page
@@ -106,7 +125,7 @@
 		count  -= result;
 		offset += result;
 		buffer += result;
-		if (result < rsize)	/* NFSv2ism */
+		if (eof)
 			break;
 	} while (count);
 
@@ -117,13 +136,80 @@
 io_error:
 	/* Note: we don't refresh if the call returned error */
 	if (refresh && result >= 0)
-		nfs_refresh_inode(inode, &rqst.ra_fattr);
-	/* N.B. Use nfs_unlock_page here? */
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+		nfs_refresh_inode(inode, &fattr);
 	return result;
 }
 
+struct page *
+_nfs_find_one_page(struct inode *inode, unsigned long offset, struct page **new_page)
+{
+	struct page	*page, **hash;
+
+	hash = page_hash(inode, offset);
+	page = __find_page(inode, offset, *hash);
+	if (!page) {
+		if (!*new_page)
+			return NULL;
+		page = *new_page;
+		add_to_page_cache(page, inode, offset, hash);
+		*new_page = NULL;
+	}
+	return page;
+}
+
+struct page *
+nfs_find_one_page(struct inode *inode, unsigned long offset)
+{
+	unsigned long	new_addr = page_cache_alloc();
+	struct page	*page, *new_page;
+
+	offset &= PAGE_CACHE_MASK;
+	if (new_addr)
+		new_page = page_cache_entry(new_addr);
+	else
+		new_page = NULL;
+	page = _nfs_find_one_page(inode, offset, &new_page);
+	if (new_page)
+		page_cache_release(new_page);
+	return page;
+}
+
+/*
+ * FIXME: Allocating rsize/PAGE_CACHE_SIZE pages and handing them down
+ *        to the nfs_read routine really should be done by
+ *	  generic_file_read.
+ */
+static int
+nfs_find_pages(struct inode *inode, struct page **pages,
+	       unsigned long offset, int nr)
+{
+	struct page	*page, *new_page = NULL;
+	int		count = 0;
+
+	offset &= PAGE_CACHE_MASK;
+	for (; nr; offset += PAGE_CACHE_SIZE, nr--) {
+		if (offset >= inode->i_size)
+			break;
+		if (!new_page) {
+			unsigned long new_addr = page_cache_alloc();
+			if (new_addr)
+				new_page = page_cache_entry(new_addr);
+		}
+		page = _nfs_find_one_page(inode, offset, &new_page);
+		if (!page)
+			break;
+		if (PageUptodate(page) || PageError(page)
+		    || test_and_set_bit(PG_locked, &page->flags)) {
+			page_cache_release(page);
+			break;
+		}
+		pages[count++] = page;
+	}
+	if (new_page)
+		page_cache_release(new_page);
+	return count;
+}
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -132,66 +218,108 @@
 nfs_readpage_result(struct rpc_task *task)
 {
 	struct nfs_rreq	*req = (struct nfs_rreq *) task->tk_calldata;
-	struct page	*page = req->ra_page;
-	unsigned long	address = page_address(page);
-	int		result = task->tk_status;
-	static int	succ = 0, fail = 0;
+	struct inode	*inode = req->ra_inode;
+	struct page	**pages = req->ra_page;
+	int		i, result = task->tk_status;
+	int		count, npages = req->ra_npages;
 
-	dprintk("NFS: %4d received callback for page %lx, result %d\n",
-			task->tk_pid, address, result);
+	dprintk("NFS: %4d received callback for %d pages %lx, result %d\n",
+			task->tk_pid, npages, page_address(*pages), result);
 
+	i = 0;
 	if (result >= 0) {
-		result = req->ra_res.count;
-		if (result < PAGE_SIZE) {
-			memset((char *) address + result, 0, PAGE_SIZE - result);
+		count = req->ra_res.count;
+		/* NFSv3 may return less data than requested,
+		 * and have the client issue another request for
+		 * the remaining data but only if the client sets
+		 * a too large rsize.
+		 */
+		if (!req->ra_res.eof && result < npages*PAGE_CACHE_SIZE) {
+			printk(KERN_WARNING
+				"NFS: server sends short reads. "
+				"Expected %lu, got %u. "
+				"Try reducing the value of rsize.\n",
+				PAGE_CACHE_SIZE, result);
+		}
+
+		/*
+		 * Refresh before we set PG_uptodate in case of invalidation
+		 */
+		nfs_refresh_inode(inode, &req->ra_fattr);
+		for (; i < npages && count > 0; i++, count -= PAGE_CACHE_SIZE) {
+			struct page *page = pages[i];
+			if (count < PAGE_CACHE_SIZE)
+				memset((char *) page_address(page) + count, 0,
+				       PAGE_CACHE_SIZE - count);
+			set_bit(PG_uptodate, &page->flags);
 		}
-		nfs_refresh_inode(req->ra_inode, &req->ra_fattr);
-		set_bit(PG_uptodate, &page->flags);
-		succ++;
-	} else {
-		set_bit(PG_error, &page->flags);
-		fail++;
-		dprintk("NFS: %d successful reads, %d failures\n", succ, fail);
 	}
-	/* N.B. Use nfs_unlock_page here? */
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+	for (; i < npages; i++)
+		set_bit(PG_error, &pages[i]->flags);
 
-	free_page(address);
+	/* N.B. Use nfs_unlock_page here? */
+	for (i = 0; i < npages; i++) {
+		clear_bit(PG_locked, &pages[i]->flags);
+		wake_up(&pages[i]->wait);
+		page_cache_release(pages[i]);
+	}
 
-	rpc_release_task(task);
-	kfree(req);
+	rpc_free(req);
 }
 
 static inline int
-nfs_readpage_async(struct dentry *dentry, struct inode *inode,
-			struct page *page)
+nfs_readpage_async(struct file *filp, struct page *page)
 {
-	unsigned long address = page_address(page);
+	struct dentry	*dentry = filp->f_dentry;
+	struct inode	*inode = dentry->d_inode;
+	struct rpc_message msg;
 	struct nfs_rreq	*req;
-	int		result = -1, flags;
+	struct page	*pages[NFS_READ_MAXIOV];
+	int		result = -ENOMEM,
+			i, flags, rpages;
 
 	dprintk("NFS: nfs_readpage_async(%p)\n", page);
-	if (NFS_CONGESTED(inode))
-		goto out_defer;
 
 	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+	flags = RPC_TASK_ASYNC;
+
+	if (IS_SWAPFILE(inode))
+		flags |= NFS_RPC_SWAPFLAGS;
+
 	req = (struct nfs_rreq *) rpc_allocate(flags, sizeof(*req));
 	if (!req)
 		goto out_defer;
 
+	pages[0] = page;
+	rpages = 1;
+	if (NFS_SERVER(inode)->rpages > 1) {
+		int npages = NFS_SERVER(inode)->rpages - 1;
+		if (npages > NFS_READ_MAXIOV - 1)
+			npages = NFS_READ_MAXIOV - 1;
+
+		rpages += nfs_find_pages(inode, pages+1,
+					 page->offset+PAGE_CACHE_SIZE, npages);
+	}
+
 	/* Initialize request */
 	/* N.B. Will the dentry remain valid for life of request? */
-	nfs_readreq_setup(req, NFS_FH(dentry), page->offset,
-				(void *) address, PAGE_SIZE);
-	req->ra_inode = inode;
-	req->ra_page = page; /* count has been incremented by caller */
+	nfs_readreq_setup(req, inode, NFS_FH(dentry), pages, rpages);
 
 	/* Start the async call */
 	dprintk("NFS: executing async READ request.\n");
-	result = rpc_do_call(NFS_CLIENT(inode), NFSPROC_READ,
-				&req->ra_args, &req->ra_res, flags,
+
+	if (NFS_CONGESTED(inode)) {
+		result = nfs_wait_on_congest(inode);
+		if (result < 0)
+			goto out_free;
+	}
+
+	msg.proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ;
+	msg.arg = &req->ra_args;
+	msg.res = &req->ra_res;
+	msg.cred = nfs_file_cred(filp);
+
+	result = rpc_call_async(NFS_CLIENT(inode), &msg, flags,
 				nfs_readpage_result, req);
 	if (result < 0)
 		goto out_free;
@@ -204,7 +332,13 @@
 	goto out;
 out_free:
 	dprintk("NFS: failed to enqueue async READ request.\n");
-	kfree(req);
+	for(i = 1; i < rpages; i++) {
+		set_bit(PG_error, &pages[i]->flags);
+		clear_bit(PG_locked, &pages[i]->flags);
+		wake_up(&pages[i]->wait);
+		page_cache_release(pages[i]);
+	}
+	rpc_free(req);
 	goto out;
 }
 
@@ -213,7 +347,7 @@
  * We read the page synchronously in the following cases:
  *  -	The file is a swap file. Swap-ins are always sync operations,
  *	so there's no need bothering to make async reads 100% fail-safe.
- *  -	The NFS rsize is smaller than PAGE_SIZE. We could kludge our way
+ *  -	The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way
  *	around this by creating several consecutive read requests, but
  *	that's hardly worth it.
  *  -	The error flag is set for this page. This happens only when a
@@ -221,45 +355,50 @@
  *  -	The server is congested.
  */
 int
-nfs_readpage(struct file *file, struct page *page)
+nfs_readpage(struct file *filp, struct page *page)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = filp->f_dentry;
 	struct inode *inode = dentry->d_inode;
-	int		error;
+	int		error = 0,
+			rsize = NFS_SERVER(inode)->rsize;
 
-	dprintk("NFS: nfs_readpage (%p %ld@%ld)\n",
-		page, PAGE_SIZE, page->offset);
 	atomic_inc(&page->count);
-	set_bit(PG_locked, &page->flags);
+
+	dprintk("NFS: nfs_readpage (%p %d@%ld)\n",
+		page, rsize, page->offset);
 
 	/*
-	 * Try to flush any pending writes to the file..
-	 *
-	 * NOTE! Because we own the page lock, there cannot
-	 * be any new pending writes generated at this point
-	 * for this page (other pages can be written to).
+	 * Try to flush any pending writes to the file
+	 * and grab the page lock
 	 */
-	error = nfs_wb_page(inode, page);
-	if (error)
-		goto out_error;
+	do {
+		error = nfs_sync_file(inode, 0, page->offset,
+				      page->offset + rsize, FLUSH_WAIT);
+		if (error < 0)
+			goto out_free;
+
+		error = nfs_wait_on_page(inode, page);
+		if (error < 0)
+			goto out_free;
+
+	} while (test_and_set_bit(PG_locked, &page->flags));
 
 	error = -1;
-	if (!IS_SWAPFILE(inode) && !PageError(page) &&
-	    NFS_SERVER(inode)->rsize >= PAGE_SIZE)
-		error = nfs_readpage_async(dentry, inode, page);
+	if (!IS_SWAPFILE(inode) && !PageError(page) && rsize >= PAGE_CACHE_SIZE)
+		error = nfs_readpage_async(filp, page);
+
 	if (error >= 0)
 		goto out;
 
-	error = nfs_readpage_sync(dentry, inode, page);
+	error = nfs_readpage_sync(dentry, inode, nfs_file_cred(filp), page);
 	if (error < 0 && IS_SWAPFILE(inode))
-		printk("Aiee.. nfs swap-in of page failed!\n");
-	goto out_free;
+		printk(KERN_ERR "Aiee.. nfs swap-in of page failed!\n");
 
-out_error:
+	/* N.B. Use nfs_unlock_page here? */
 	clear_bit(PG_locked, &page->flags);
 	wake_up(&page->wait);
 out_free:
-	free_page(page_address(page));
+	page_cache_release(page);
 out:
 	return error;
 }
--- linux/fs/nfs/symlink.c.nfsattack-gafton	Mon Apr 12 12:57:11 1999
+++ linux/fs/nfs/symlink.c	Fri Feb  4 23:26:34 2000
@@ -5,12 +5,18 @@
  *
  *  Optimization changes Copyright (C) 1994 Florian La Roche
  *
+ *  Jun 7 1999, cache symlink lookups in the page cache.  -DaveM
+ *
  *  nfs symlink handling code
  */
 
+#define NFS_NEED_XDR_TYPES
 #include <linux/sched.h>
 #include <linux/errno.h>
+#include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs.h>
+#include <linux/pagemap.h>
 #include <linux/stat.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
@@ -37,70 +43,116 @@
 	NULL,			/* rename */
 	nfs_readlink,		/* readlink */
 	nfs_follow_link,	/* follow_link */
+	NULL,			/* get_block */
 	NULL,			/* readpage */
 	NULL,			/* writepage */
-	NULL,			/* bmap */
+	NULL,			/* flushpage */
 	NULL,			/* truncate */
-	NULL			/* permission */
+	NULL,			/* permission */
+	NULL,			/* smap */
+	NULL			/* revalidate */
 };
 
+/* Symlink caching in the page cache is even more simplistic
+ * and straight-forward than readdir caching.
+ */
+static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode *inode)
+{
+	struct nfs_fattr fattr;
+	struct page *page;
+	void * buffer;
+	unsigned int error;
+
+	page = nfs_find_lock_page(inode, 0);
+	if (!page)
+		goto out;
+
+	if (PageUptodate(page))
+		goto unlock_out;
+
+	/* We place the length at the beginning of the page,
+	 * in client byte order, followed by the string.
+	 */
+	buffer = (void *)page_address(page);
+	memset(buffer, 0, PAGE_CACHE_SIZE);
+	error = NFS_CALL(readlink, inode, (dentry, &fattr, buffer,
+					   PAGE_CACHE_SIZE-sizeof(u32)-4));
+	if (error < 0)
+		goto error;
+	nfs_refresh_inode(inode, &fattr);
+	if (PageError(page))
+		clear_bit(PG_error, &page->flags);
+	set_bit(PG_uptodate, &page->flags);
+
+unlock_out:
+	clear_bit(PG_locked, &page->flags);
+	wake_up(&page->wait);
+out:
+	return page;
+
+error:
+	set_bit(PG_error, &page->flags);
+	goto unlock_out;
+}
+
 static int nfs_readlink(struct dentry *dentry, char *buffer, int buflen)
 {
-	int error;
-	unsigned int len;
-	char *res;
-	void *mem;
-
-	dfprintk(VFS, "nfs: readlink(%s/%s)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
-
-	error = nfs_proc_readlink(NFS_DSERVER(dentry), NFS_FH(dentry),
-					&mem, &res, &len, NFS_MAXPATHLEN);
-	if (! error) {
-		if (len > buflen)
-			len = buflen;
-		copy_to_user(buffer, res, len);
-		error = len;
-		kfree(mem);
-	}
-	return error;
+	struct inode *inode = dentry->d_inode;
+	struct page *page;
+	u32 *p, len;
+
+	/* Caller revalidated the directory inode already. */
+	page = find_page(inode, 0);
+	if (!page || !PageUptodate(page))
+		goto no_readlink_page;
+success:
+	p = (u32 *) page_address(page);
+	len = *p++;
+	if (len > buflen)
+		len = buflen;
+	copy_to_user(buffer, p, len);
+	page_cache_release(page);
+	return len;
+
+no_readlink_page:
+	page = try_to_get_symlink_page(dentry, inode);
+	if (!page)
+		goto no_page;
+	if (PageUptodate(page))
+		goto success;
+	page_cache_release(page);
+no_page:
+	return -EIO;
 }
 
 static struct dentry *
-nfs_follow_link(struct dentry * dentry, struct dentry *base, unsigned int follow)
+nfs_follow_link(struct dentry *dentry, struct dentry *base, unsigned int follow)
 {
-	int error;
-	unsigned int len;
-	char *res;
-	void *mem;
-	char *path;
 	struct dentry *result;
-
-	dfprintk(VFS, "nfs: follow_link(%s/%s)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
-
-	error = nfs_proc_readlink(NFS_DSERVER(dentry), NFS_FH(dentry),
-				 &mem, &res, &len, NFS_MAXPATHLEN);
-	result = ERR_PTR(error);
-	if (error)
-		goto out_dput;
-
-	result = ERR_PTR(-ENOMEM);
-	path = kmalloc(len + 1, GFP_KERNEL);
-	if (!path)
-		goto out_mem;
-	memcpy(path, res, len);
-	path[len] = 0;
-	kfree(mem);
-
-	result = lookup_dentry(path, base, follow);
-	kfree(path);
-out:
+	struct inode *inode = dentry->d_inode;
+	struct page *page;
+	u32 *p;
+
+	/* Caller revalidated the directory inode already. */
+	page = find_page(inode, 0);
+	if (!page)
+		goto no_followlink_page;
+	if (!PageUptodate(page))
+		goto followlink_read_error;
+success:
+	p = (u32 *) page_address(page);
+	result = lookup_dentry((char *) (p + 1), base, follow);
+	page_cache_release(page);
 	return result;
 
-out_mem:
-	kfree(mem);
-out_dput:
-	dput(base);
-	goto out;
+no_followlink_page:
+	page = try_to_get_symlink_page(dentry, inode);
+	if (!page)
+		goto no_page;
+	if (PageUptodate(page))
+		goto success;
+followlink_read_error:
+	page_cache_release(page);
+no_page:
+	return ERR_PTR(-EIO);
 }
--- linux/fs/nfs/write.c.nfsattack-gafton	Wed Mar  3 14:17:02 1999
+++ linux/fs/nfs/write.c	Fri Feb  4 23:26:34 2000
@@ -10,7 +10,7 @@
  * RPC call to write the page is scheduled immediately; otherwise, the call
  * is delayed for a few seconds.
  *
- * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
+ * Just like readahead, no async I/O is performed if wsize < PAGE_CACHE_SIZE.
  *
  * Write requests are kept on the inode's writeback list. Each entry in
  * that list references the page (portion) to be written. When the
@@ -44,54 +44,77 @@
  * buffer_heads with a b_ops-> field.
  *
  * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ *
+ * Some parts
+ * Copyright (C) 1999, Trond Myklebust <trond.myklebust@fys.uio.no>
  */
 
 #include <linux/types.h>
 #include <linux/malloc.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
-#include <linux/file.h>
 
+#include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/clnt.h>
+#include <asm/spinlock.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_cluster.h>
 #include <asm/uaccess.h>
 
+
 #define NFS_PARANOIA 1
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
-static void			nfs_wback_begin(struct rpc_task *task);
-static void			nfs_wback_result(struct rpc_task *task);
-static void			nfs_cancel_request(struct nfs_wreq *req);
-
 /*
- * Cache parameters
+ * NFSv3 constants
  */
-#define NFS_WRITEBACK_DELAY	(10 * HZ)
-#define NFS_WRITEBACK_MAX	64
 
 /*
- * Limit number of delayed writes
+ * Local function declarations
  */
-static int			nr_write_requests = 0;
-static struct rpc_wait_queue	write_queue = RPC_INIT_WAITQ("write_chain");
+static void	nfs_writeback_done(struct rpc_task *);
+static void	nfs_write_rpcsetup(struct dentry*, struct inode *inode,
+				   struct nfs_write_data *data,
+                                   __u64 start, __u64 end);
+
+static void	nfs_commit_done(struct rpc_task *task);
+static void	nfs_write_release(struct rpc_task *task);
+static void	nfs_commit_release(struct rpc_task *task);
 
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
 #endif
 
+static int
+nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
+{
+	if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) {
+		fattr->pre_size  = NFS_CACHE_ISIZE(inode);
+		fattr->pre_mtime = NFS_CACHE_MTIME(inode);
+		fattr->pre_ctime = NFS_CACHE_CTIME(inode);
+		fattr->valid |= NFS_ATTR_WCC;
+	}
+	return nfs_refresh_inode(inode, fattr);
+}
+
 /*
  * Write a page synchronously.
  * Offset is the data offset within the page.
  */
 static int
 nfs_writepage_sync(struct dentry *dentry, struct inode *inode,
-		struct page *page, unsigned long offset, unsigned int count)
+		   struct rpc_cred *cred, struct page *page,
+		   unsigned long offset, unsigned int count)
 {
 	unsigned int	wsize = NFS_SERVER(inode)->wsize;
-	int		result, refresh = 0, written = 0;
+	int		result, refresh = 0, written = 0, flags;
 	u8		*buffer;
 	struct nfs_fattr fattr;
+	struct nfs_writeverf verifier;
 
 	dprintk("NFS:      nfs_writepage_sync(%s/%s %d@%ld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -100,13 +123,15 @@
 	buffer = (u8 *) page_address(page) + offset;
 	offset += page->offset;
 
+	flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
+
 	do {
 		if (count < wsize && !IS_SWAPFILE(inode))
 			wsize = count;
 
-		result = nfs_proc_write(NFS_DSERVER(dentry), NFS_FH(dentry),
-					IS_SWAPFILE(inode), offset, wsize,
-					buffer, &fattr);
+		result = NFS_CALL(write, inode, (dentry, &fattr, cred, flags,
+						 offset, wsize, buffer,
+						 &verifier));
 
 		if (result < 0) {
 			/* Must mark the page invalid after I/O error */
@@ -114,7 +139,7 @@
 			goto io_error;
 		}
 		if (result != wsize)
-			printk("NFS: short write, wsize=%u, result=%d\n",
+			printk(KERN_ERR "NFS: short write, size=%u, result=%d\n",
 			wsize, result);
 		refresh = 1;
 		buffer  += wsize;
@@ -131,550 +156,632 @@
 
 io_error:
 	/* Note: we don't refresh if the call failed (fattr invalid) */
-	if (refresh && result >= 0) {
-		/* See comments in nfs_wback_result */
-		/* N.B. I don't think this is right -- sync writes in order */
-		if (fattr.size < inode->i_size)
-			fattr.size = inode->i_size;
-		if (fattr.mtime.seconds < inode->i_mtime)
-			printk("nfs_writepage_sync: prior time??\n");
-		/* Solaris 2.5 server seems to send garbled
-		 * fattrs occasionally */
-		if (inode->i_ino == fattr.fileid) {
-			/*
-			 * We expect the mtime value to change, and
-			 * don't want to invalidate the caches.
-			 */
-			inode->i_mtime = fattr.mtime.seconds;
-			nfs_refresh_inode(inode, &fattr);
-		} 
-		else
-			printk("nfs_writepage_sync: inode %ld, got %u?\n",
-				inode->i_ino, fattr.fileid);
-	}
+	if (refresh && result >= 0)
+		nfs_write_attributes(inode, &fattr);
 
+	nfs_unlock_page(page);
 	return written? written : result;
 }
 
 /*
- * Append a writeback request to a list
+ * Write a page to the server. This will be used for NFS swapping only
+ * (for now), and we currently do this synchronously only.
  */
-static inline void
-append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+int
+nfs_writepage(struct file * filp, struct page *page)
 {
-	dprintk("NFS:      append_write_request(%p, %p)\n", q, wreq);
-	rpc_append_list(q, wreq);
+	struct dentry	*dentry = filp->f_dentry;
+	struct rpc_cred	*cred = nfs_file_cred(filp);
+	int		status;
+
+	status = nfs_writepage_sync(dentry, dentry->d_inode, cred, page,
+				    0, PAGE_CACHE_SIZE);
+	return status;
 }
 
 /*
- * Remove a writeback request from a list
+ * Update and possibly write a cached page of an NFS file.
+ *
+ * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
+ * things with a page scheduled for an RPC call (e.g. invalidate it).
  */
-static inline void
-remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+int
+nfs_updatepage(struct file *file, struct page *page, const char *buf, unsigned long offset, unsigned int count, int sync)
 {
-	dprintk("NFS:      remove_write_request(%p, %p)\n", q, wreq);
-	rpc_remove_list(q, wreq);
-}
+	struct dentry		*dentry = file->f_dentry;
+	struct inode		*inode = dentry->d_inode;
+	struct nfs_page		*req;
+	struct nfs_cluster	*cluster;
+	void	 		*dest;
+	int			status;
 
-/*
- * Find a non-busy write request for a given page to
- * try to combine with.
- */
-static inline struct nfs_wreq *
-find_write_request(struct inode *inode, struct page *page)
-{
-	pid_t pid = current->pid;
-	struct nfs_wreq	*head, *req;
+	dprintk("NFS:      nfs_updatepage(%s/%s %d@%ld, sync=%d)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		count, page->offset+offset, sync);
 
-	dprintk("NFS:      find_write_request(%x/%ld, %p)\n",
-				inode->i_dev, inode->i_ino, page);
-	if (!(req = head = NFS_WRITEBACK(inode)))
-		return NULL;
-	do {
-		/*
-		 * We can't combine with canceled requests or
-		 * requests that have already been started..
-		 */
-		if (req->wb_flags & (NFS_WRITE_CANCELLED | NFS_WRITE_INPROGRESS))
-			continue;
 
-		if (req->wb_page == page && req->wb_pid == pid)
-			return req;
+	while (test_and_set_bit(PG_locked, &page->flags)) {
+		status = nfs_wait_on_page(inode, page);
+		if (status < 0)
+			goto done;
+	}
+	/*
+	 * If wsize is smaller than page size, update and write
+	 * page synchronously.
+	 */
+	dest = (u8*)page_address(page) + offset;
+	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) {
+		if (dest != buf)
+			count -= copy_from_user(dest, buf, count);
+		if (!count) {
+			status = -EFAULT;
+			clear_bit(PG_uptodate, &page->flags);
+			goto out_free;
+		}
+		status = nfs_writepage_sync(dentry, inode, nfs_file_cred(file),
+					    page, offset, count);
+		return status;
+	}
 
-		/*
-		 * Ehh, don't keep too many tasks queued..
-		 */
-		rpc_wake_up_task(&req->wb_task);
+        /*
+         * Try to find an NFS request corresponding to this page
+         * and update it.
+         * If there is one, we can be sure it is not yet being processed
+         * because *we* hold the lock on the page.
+         * If the request was not created by the calling process (or the
+         * process was running under a different uid when creating it),
+         * we must flush it out now. Lend it the page lock and perform
+         * a synchronous RPC WRITE call.
+         */
+again:
+	if (NFS_CONGESTED(inode)) {
+		status = nfs_wait_on_congest(inode);
+		if (status < 0)
+			goto done;
+	}
 
-	} while ((req = WB_NEXT(req)) != head);
-	return NULL;
-}
+        status = nfs_update_request(file, page, offset, count, &req);
+        if (status == 0) {
+		status = nfs_flush_wback(req, FLUSH_SYNC|FLUSH_STABLE);
+                if (status >= 0)
+                        goto again;
+                /* XXX: when the server returned an error, check that it is
+                 * propagated to the proper process (it should be).
+                 */
+        }
+        if (status < 0) {
+                /* Error when allocating cluster or request, or when
+                 * copying used data.
+                 */
+                goto out_free;
+        }
+
+	if (dest != buf)
+		count -= copy_from_user(dest, buf, count);
+	if (!count) {
+		status = -EFAULT;
+		clear_bit(PG_uptodate, &page->flags);
+		goto out_free;
+	}
 
-/*
- * Find and release all failed requests for this inode.
- */
-int
-nfs_check_failed_request(struct inode * inode)
-{
-	/* FIXME! */
-	return 0;
-}
 
-/*
- * Try to merge adjacent write requests. This works only for requests
- * issued by the same user.
- */
-static inline int
-update_write_request(struct nfs_wreq *req, unsigned int first,
-			unsigned int bytes)
-{
-	unsigned int	rqfirst = req->wb_offset,
-			rqlast = rqfirst + req->wb_bytes,
-			last = first + bytes;
+	if (req->end >= req->start + PAGE_CACHE_SIZE)
+		set_bit(PG_uptodate, &page->flags);
 
-	dprintk("nfs:      trying to update write request %p\n", req);
+        cluster = req->cluster;
+        status = count;         /* unless we detect an error */
 
-	/* not contiguous? */
-	if (rqlast < first || last < rqfirst)
-		return 0;
+        /* If the user requested a sync write, do it now */
+        if (sync) {
+                int     error;
+
+                error = nfs_flush_wback(req, FLUSH_SYNC|FLUSH_STABLE);
+                if (error < 0 || (error = file->f_error) < 0)
+                        status = error;
+		file->f_error = 0;
+		goto out_free;
+        } else {
+		cluster->count++;
+		nfs_unlock_page(page);
+
+		/* If we wrote past the end of the page.
+		 * Call the strategy routine so it can send out a bunch
+		 * of requests.
+		 */
+		 if (req->end >= req->start + PAGE_CACHE_SIZE)
+			nfs_strategy(cluster);
 
-	if (first < rqfirst)
-		rqfirst = first;
-	if (rqlast < last)
-		rqlast = last;
+		release_cluster(cluster);
+        }
 
-	req->wb_offset = rqfirst;
-	req->wb_bytes  = rqlast - rqfirst;
-	req->wb_count++;
+        /* Cluster may have gone at this point */
 
-	return 1;
-}
+done:
+        dprintk("NFS:      nfs_updatepage returns %d (isize %ld)\n",
+                                                status, inode->i_size);
+        return status;
+out_free:
+	nfs_unlock_page(page);
+	goto done;
 
-static inline void
-free_write_request(struct nfs_wreq * req)
-{
-	if (!--req->wb_count)
-		kfree(req);
 }
 
 /*
- * Create and initialize a writeback request
+ * Coalesce adjacent write requests
  */
-static inline struct nfs_wreq *
-create_write_request(struct file * file, struct page *page, unsigned int offset, unsigned int bytes)
+static void
+coalesce(struct nfs_page *req, struct nfs_write_data *data)
 {
-	struct dentry	*dentry = file->f_dentry;
-	struct inode	*inode = dentry->d_inode;
-	struct rpc_clnt	*clnt = NFS_CLIENT(inode);
-	struct nfs_wreq *wreq;
-	struct rpc_task	*task;
-
-	dprintk("NFS:      create_write_request(%s/%s, %ld+%d)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		page->offset + offset, bytes);
+	struct nfs_cluster      *cluster = req->cluster;
+	unsigned int            wpages, index, count, total;
+	struct iovec            *iov;
+	__u64                   pos, offset;
+
+	wpages = NFS_SERVER(cluster->file->f_dentry->d_inode)->wpages;
+	if (wpages > NFS_WRITE_MAXIOV)
+		wpages = NFS_WRITE_MAXIOV;
+
+	/* Index of first request in this write call */
+	pos         = req->end;
+	index       = REQUEST_NR(req->start);
+	data->index = index;
+	data->count = 0;
+	offset      = req->start & ~PAGE_CACHE_MASK;
+	total       = 0;
+
+	read_lock(&nfs_wreq_lock);
+	goto first;
+
+	while (index < CLUSTER_PAGES && data->count < wpages) {
+		req = cluster->request[index];
+		if (req == 0 || req->start != pos
+		    || !IS_DIRTY(req) || !nfs_lock_page(req))
+			break;
+	first:
+		count = req->end - req->start;
+		iov = data->args.iov + data->count;
+		iov->iov_base = (u8 *) page_address(req->page) + offset;
+		iov->iov_len = count;
+		total += count;
+		data->count++;
+		index++;
+		req->timeout = jiffies + NFS_WRITEBACK_DELAY;
 
-	/* FIXME: Enforce hard limit on number of concurrent writes? */
-	wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_KERNEL);
-	if (!wreq)
-		goto out_fail;
-	memset(wreq, 0, sizeof(*wreq));
-
-	task = &wreq->wb_task;
-	rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE);
-	task->tk_calldata = wreq;
-	task->tk_action = nfs_wback_begin;
-
-	rpcauth_lookupcred(task);	/* Obtain user creds */
-	if (task->tk_status < 0)
-		goto out_req;
-
-	/* Put the task on inode's writeback request list. */
-	wreq->wb_file = file;
-	wreq->wb_pid    = current->pid;
-	wreq->wb_page   = page;
-	wreq->wb_offset = offset;
-	wreq->wb_bytes  = bytes;
-	wreq->wb_count	= 2;		/* One for the IO, one for us */
-
-	append_write_request(&NFS_WRITEBACK(inode), wreq);
-
-	if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4)
-		rpc_wake_up_next(&write_queue);
-
-	return wreq;
-
-out_req:
-	rpc_release_task(task);
-	kfree(wreq);
-out_fail:
-	return NULL;
-}
-
-/*
- * Schedule a writeback RPC call.
- * If the server is congested, don't add to our backlog of queued
- * requests but call it synchronously.
- * The function returns whether we should wait for the thing or not.
- *
- * FIXME: Here we could walk the inode's lock list to see whether the
- * page we're currently writing to has been write-locked by the caller.
- * If it is, we could schedule an async write request with a long
- * delay in order to avoid writing back the page until the lock is
- * released.
- */
-static inline int
-schedule_write_request(struct nfs_wreq *req, int sync)
-{
-	struct rpc_task	*task = &req->wb_task;
-	struct file	*file = req->wb_file;
-	struct dentry	*dentry = file->f_dentry;
-	struct inode	*inode = dentry->d_inode;
-
-	if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX)
-		sync = 1;
-
-	if (sync) {
-		sigset_t	oldmask;
-		struct rpc_clnt *clnt = NFS_CLIENT(inode);
-		dprintk("NFS: %4d schedule_write_request (sync)\n",
-					task->tk_pid);
-		/* Page is already locked */
-		rpc_clnt_sigmask(clnt, &oldmask);
-		rpc_execute(task);
-		rpc_clnt_sigunmask(clnt, &oldmask);
-	} else {
-		dprintk("NFS: %4d schedule_write_request (async)\n",
-					task->tk_pid);
-		task->tk_flags |= RPC_TASK_ASYNC;
-		task->tk_timeout = NFS_WRITEBACK_DELAY;
-		rpc_sleep_on(&write_queue, task, NULL, NULL);
+		pos = req->end;
+		offset = 0;
 	}
+	read_unlock(&nfs_wreq_lock);
 
-	return sync;
+	dprintk("NFS: coalesce gathered %d pages (first %d)\n",
+		data->count, data->index);
+	data->args.nriov = data->count;
+	data->args.count = total;
 }
 
 /*
- * Wait for request to complete.
+ * Create an RPC task for the given write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
  */
-static int
-wait_on_write_request(struct nfs_wreq *req)
+int nfs_flush_wback(struct nfs_page *req, int how)
 {
-	struct file		*file = req->wb_file;
-	struct dentry		*dentry = file->f_dentry;
-	struct inode		*inode = dentry->d_inode;
-	struct rpc_clnt		*clnt = NFS_CLIENT(inode);
-	struct wait_queue	wait = { current, NULL };
-	sigset_t		oldmask;
-	int retval;
-
-	/* Make sure it's started.. */
-	if (!WB_INPROGRESS(req))
-		rpc_wake_up_task(&req->wb_task);
-
-	rpc_clnt_sigmask(clnt, &oldmask);
-	add_wait_queue(&req->wb_wait, &wait);
-	for (;;) {
-		current->state = TASK_INTERRUPTIBLE;
-		retval = 0;
-		if (req->wb_flags & NFS_WRITE_COMPLETE)
-			break;
-		retval = -ERESTARTSYS;
-		if (signalled())
-			break;
-		schedule();
+	struct nfs_cluster      *cluster = req->cluster;
+	struct file		*filp = cluster->file;
+	struct dentry           *dentry = filp->f_dentry;
+	struct inode            *inode = dentry->d_inode;
+	struct rpc_clnt 	*clnt = NFS_CLIENT(inode);
+	struct nfs_write_data   *data = &req->wdata;
+	struct rpc_task         *task = &data->task;
+	struct rpc_message	msg;
+	int                     flags = 0;
+	sigset_t		oldset;
+
+	cluster->count++;
+
+	if (!IS_DIRTY(req)) {
+		if (!(how & FLUSH_STABLE)) {
+			printk(KERN_ERR "NFS: nfs_flush_wback called for clean page!\n");
+			how |= FLUSH_STABLE;
+		}
+
+		/* Make sure we mark the page dirty so that our book-
+		 * keeping is consistent */
+		nfs_mark_request_dirty(cluster, req);
 	}
-	remove_wait_queue(&req->wb_wait, &wait);
-	current->state = TASK_RUNNING;
-	rpc_clnt_sigunmask(clnt, &oldmask);
-	return retval;
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	nfs_write_rpcsetup(dentry, inode, data, req->start, req->end);
+
+	/* Set up the argument struct */
+	data->args.stable = (how & FLUSH_STABLE)? NFS_FILE_SYNC : NFS_UNSTABLE;
+
+	/* Put one or more pages of data into call */
+	coalesce(req, data);
+	cluster->pending += data->count;
+
+	/* Hack for NFSv2 as it never returns the size written */
+	data->res.count = data->args.count;
+
+	/* Finalize the task. */
+	rpc_init_task(task, clnt, nfs_writeback_done, flags);
+	task->tk_calldata = req;
+
+	msg.proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
+	msg.arg = &data->args;
+	msg.res = &data->res;
+	msg.cred = nfs_file_cred(filp);
+
+	dprintk("NFS: %4d initiated write call (req %s/%s count %d nriov %d)\n",
+		task->tk_pid, 
+		dentry->d_parent->d_name.name,
+		dentry->d_name.name,
+		data->args.count, data->args.nriov);
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	rpc_call_setup(task, &msg, 0);
+	rpc_execute(task);
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return 0;
 }
 
+
 /*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
+ * Allocate the argument/result storage required for the RPC call.
  */
-int
-nfs_writepage(struct file * file, struct page *page)
+static void
+nfs_write_rpcsetup(struct dentry * dentry, struct inode *inode,
+		   struct nfs_write_data *data,
+		   __u64 start, __u64 end)
 {
-	struct dentry *dentry = file->f_dentry;
-	return nfs_writepage_sync(dentry, dentry->d_inode, page, 0, PAGE_SIZE);
+	/* Set up the RPC argument and reply structs */
+	data->args.fh     = NFS_FH(dentry);
+	data->res.fattr   = &data->fattr;
+	data->res.count   = end - start;
+	data->args.offset = start;
+	data->args.count  = end - start;
+	data->res.verf    = &data->verf;
+	data->fattr.valid = 0;
+	data->verf.committed = 0;
 }
 
+
 /*
- * Update and possibly write a cached page of an NFS file.
- *
- * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
- * things with a page scheduled for an RPC call (e.g. invalidate it).
+ * This function is called when the WRITE call is complete.
  */
-int
-nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsigned int count, int sync)
+static void
+nfs_writeback_done(struct rpc_task *task)
 {
-	struct dentry	*dentry = file->f_dentry;
-	struct inode	*inode = dentry->d_inode;
-	struct nfs_wreq	*req;
-	int		synchronous = sync;
-	int		retval;
+        struct nfs_cluster      *cluster;
+        struct nfs_write_data   *data;
+        struct nfs_writeargs    *argp;
+        struct nfs_writeres     *resp;
+        struct nfs_page         *req, *arg;
+	struct dentry		*dentry;
+        struct inode            *inode;
+        unsigned int            nr, count;
+
+        dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
+                                task->tk_pid, task->tk_status);
+
+        if (!(arg = (struct nfs_page *) task->tk_calldata)) {
+                printk(KERN_ERR "NFS: no write data for RPC task?!\n");
+                return;
+        }
+	data = &arg->wdata;
+
+	cluster = arg->cluster;
+	argp    = &data->args;
+	resp    = &data->res;
+	dentry	= cluster->file->f_dentry;
+	inode   = dentry->d_inode;
+
+        /* We can't handle that yet but we check for it nevertheless */
+        if (resp->count < argp->count && task->tk_status >= 0) {
+                static unsigned long    complain = 0;
+                if (time_before(complain, jiffies)) {
+                        printk(KERN_WARNING
+			       "NFS: Server wrote less than requested.\n");
+                        complain = jiffies + 300 * HZ;
+                }
+                /* Can't do anything about it right now except throw
+                 * an error. */
+                task->tk_status = -EIO;
+        }
+
+
+        if (task->tk_status < 0) {
+                /* A WRITE error occurred. Report the error back to the
+                 * application. */
+		cluster->file->f_error = task->tk_status;
+		for (nr = data->index, count = data->count; count--; nr++) {
+			if ((req = cluster->request[nr]) != NULL)
+				clear_bit(PG_uptodate, &req->page->flags);
+		}
+		goto out_release;
+	}
 
-	dprintk("NFS:      nfs_updatepage(%s/%s %d@%ld, sync=%d)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		count, page->offset+offset, sync);
+        /* Update attributes as result of writeback. */
+	nfs_write_attributes(inode, resp->fattr);
 
-	/*
-	 * Try to find a corresponding request on the writeback queue.
-	 * If there is one, we can be sure that this request is not
-	 * yet being processed, because we hold a lock on the page.
-	 *
-	 * If the request was created by us, update it. Otherwise,
-	 * transfer the page lock and flush out the dirty page now.
-	 * After returning, generic_file_write will wait on the
-	 * page and retry the update.
-	 */
-	req = find_write_request(inode, page);
-	if (req && req->wb_file == file && update_write_request(req, offset, count))
-		goto updated;
+	for (nr = data->index, count = data->count; count--; nr++) {
+		if (!(req = cluster->request[nr]))
+			continue;
+		if (req->flags & PG_INVALIDATE_AFTER)
+				clear_bit(PG_uptodate, &req->page->flags);
+	}
 
-	/*
-	 * If wsize is smaller than page size, update and write
-	 * page synchronously.
-	 */
-	if (NFS_SERVER(inode)->wsize < PAGE_SIZE)
-		return nfs_writepage_sync(dentry, inode, page, offset, count);
+        /* If data has been committed, we're all done.
+         * We always take this branch for NFSv2 as the XDR routine always
+         * sets the committed flag.
+         */
+        if (resp->verf->committed != NFS_UNSTABLE)
+                goto out_release;
+
+	if (resp->verf->committed < data->args.stable) {
+		/* We tried a write call, but the server did not
+		 * commit data to stable storage even though we
+		 * requested it.
+		 */
+		static unsigned long    complain = 0;
 
-	/* Create the write request. */
-	req = create_write_request(file, page, offset, count);
-	if (!req)
-		return -ENOBUFS;
+		if (time_before(complain, jiffies)) {
+			printk(KERN_NOTICE
+				"NFS: faulty NFSv3 server %s\n",
+				NFS_SERVER(inode)->hostname);
+			complain = jiffies + 300 * HZ;
+		}
+		goto out_release;
+	}
 
-	/*
-	 * Ok, there's another user of this page with the new request..
-	 * The IO completion will then free the page and the dentry.
+	/* Normal NFSv3 operation. Save the verifier,
+	 * and unlock the page. Don't release the request.
 	 */
-	atomic_inc(&page->count);
-	file->f_count++;
-
-	/* Schedule request */
-	synchronous = schedule_write_request(req, sync);
-
-updated:
-	if (req->wb_bytes == PAGE_SIZE)
-		set_bit(PG_uptodate, &page->flags);
-
-	retval = count;
-	if (synchronous) {
-		int status = wait_on_write_request(req);
-		if (status) {
-			nfs_cancel_request(req);
-			retval = status;
-		} else {
-			status = req->wb_status;
-			if (status < 0)
-				retval = status;
+	for (nr = data->index, count = data->count; count--; nr++) {
+		if (!(req = cluster->request[nr]))
+			continue;
+		/* Avoid copying over the first page's verify info */
+		if (&req->wdata.verf != &data->verf)
+			memcpy(&req->wdata.verf, &data->verf, sizeof(data->verf));
+		/* Update commit sequence number.
+		 * This implicitly marks the page clean
+		 */
+		req->wdata.commit  = cluster->sequence;
+		req->timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
+	}
+	for (nr = data->index, count = data->count; count--; nr++) {
+		if (!(req = cluster->request[nr]))
+			continue;
+		if (req->flags & PG_UNLOCK_AFTER) {
+			req->flags &= ~PG_UNLOCK_AFTER;
+			nfs_unlock_page(req->page);
 		}
-
-		if (retval < 0)
-			clear_bit(PG_uptodate, &page->flags);
 	}
+        goto out_norelease;
 
-	free_write_request(req);
-	return retval;
-}
+out_release:
+        /* Release requests */
+	task->tk_release = nfs_write_release;
+	return;
 
-/*
- * Cancel a write request. We always mark it cancelled,
- * but if it's already in progress there's no point in
- * calling rpc_exit, and we don't want to overwrite the
- * tk_status field.
- */ 
-static void
-nfs_cancel_request(struct nfs_wreq *req)
-{
-	req->wb_flags |= NFS_WRITE_CANCELLED;
-	if (!WB_INPROGRESS(req)) {
-		rpc_exit(&req->wb_task, 0);
-		rpc_wake_up_task(&req->wb_task);
-	}
+out_norelease:
+	cluster->pending -= data->count;
+	cluster->dirty -= data->count;
+
+	if (!cluster->pending)
+		wake_up(&cluster->wait);
+
+	cluster_schedule_scan(cluster, jiffies + NFS_COMMIT_DELAY);
+	release_cluster(cluster);
 }
 
-/*
- * Cancel all writeback requests, both pending and in progress.
- */
 static void
-nfs_cancel_dirty(struct inode *inode, pid_t pid)
+nfs_write_release(struct rpc_task *task)
 {
-	struct nfs_wreq *head, *req;
+	struct nfs_page *req = (struct nfs_page *) task->tk_calldata;
+	struct nfs_write_data	*data = &req->wdata;
+	struct nfs_cluster	*cluster = req->cluster;
+	int			nr, count;
 
-	req = head = NFS_WRITEBACK(inode);
-	while (req != NULL) {
-		if (pid == 0 || req->wb_pid == pid)
-			nfs_cancel_request(req);
-		if ((req = WB_NEXT(req)) == head)
-			break;
-	}
-}
+	cluster->pending -= data->count;
+	cluster->dirty -= data->count;
 
-/*
- * If we're waiting on somebody else's request
- * we need to increment the counter during the
- * wait so that the request doesn't disappear
- * from under us during the wait..
- */
-static int FASTCALL(wait_on_other_req(struct nfs_wreq *));
-static int wait_on_other_req(struct nfs_wreq *req)
-{
-	int retval;
-	req->wb_count++;
-	retval = wait_on_write_request(req);
-	free_write_request(req);
-	return retval;
-}
 
-/*
- * This writes back a set of requests according to the condition.
- *
- * If this ever gets much more convoluted, use a fn pointer for
- * the condition..
- */
-#define NFS_WB(inode, cond) { int retval = 0 ; \
-	do { \
-		struct nfs_wreq *req = NFS_WRITEBACK(inode); \
-		struct nfs_wreq *head = req; \
-		if (!req) break; \
-		for (;;) { \
-			if (!(req->wb_flags & NFS_WRITE_COMPLETE)) \
-				if (cond) break; \
-			req = WB_NEXT(req); \
-			if (req == head) goto out; \
-		} \
-		retval = wait_on_other_req(req); \
-	} while (!retval); \
-out:	return retval; \
-}
+	write_lock(&nfs_wreq_lock);
+	for (nr = data->index, count = data->count; count--; nr++) {
+		if (!(req = cluster->request[nr]))
+			continue;
+		nfs_release_request(req);
+	}
+	write_unlock(&nfs_wreq_lock);
+	if (!cluster->pending)
+		wake_up(&cluster->wait);
 
-int
-nfs_wb_all(struct inode *inode)
-{
-	NFS_WB(inode, 1);
+	cluster_schedule_scan(cluster, jiffies + NFS_COMMIT_DELAY);
+	release_cluster(cluster);
 }
 
 /*
- * Write back all requests on one page - we do this before reading it.
+ * Commit cluster of dirty pages
  */
 int
-nfs_wb_page(struct inode *inode, struct page *page)
+commit_cluster(struct nfs_cluster *cluster, int how)
 {
-	NFS_WB(inode, req->wb_page == page);
-}
+	struct nfs_page         *req;
+	struct rpc_clnt		*clnt;
+	struct file		*filp = cluster->file;
+	struct dentry		*dentry = filp->f_dentry;
+	struct inode		*inode = dentry->d_inode;
+	struct nfs_write_data   *data = &cluster->wdata;
+	struct rpc_task         *task = &data->task;
+	struct rpc_message	msg;
+	int                     flags, nr, index;
+	int			error;
+	sigset_t		oldset;
+
+	if (NFS_PROTO(inode)->version == 2) {
+		printk(KERN_ERR "NFS: commit_cluster called for NFSv2\n"
+		       "     (dirty %d pages %d pending %d)\n",
+                       cluster->dirty, cluster->pages, cluster->pending);
+		return -EIO;
+	}
 
-/*
- * Write back all pending writes from one file descriptor..
- */
-int
-nfs_wb_file(struct inode *inode, struct file *file)
-{
-	NFS_WB(inode, req->wb_file == file);
-}
+	if (cluster->dirty || !cluster->pages || cluster->committing)
+		return 0;
+	cluster->committing = 1;
 
-void
-nfs_inval(struct inode *inode)
-{
-	nfs_cancel_dirty(inode,0);
-}
+	cluster->count++;
+	read_lock(&nfs_wreq_lock);
+	for (index = 0, nr = 0; index < CLUSTER_PAGES; index++) {
+		if (!(req = cluster->request[index]))
+			continue;
+		/*
+		 * Is the page dirty?
+		 */
+		if (IS_DIRTY(req))
+			continue;
 
-/*
- * The following procedures make up the writeback finite state machinery:
- *
- * 1.	Try to lock the page if not yet locked by us,
- *	set up the RPC call info, and pass to the call FSM.
- */
-static void
-nfs_wback_begin(struct rpc_task *task)
-{
-	struct nfs_wreq	*req = (struct nfs_wreq *) task->tk_calldata;
-	struct page	*page = req->wb_page;
-	struct file	*file = req->wb_file;
-	struct dentry	*dentry = file->f_dentry;
-
-	dprintk("NFS: %4d nfs_wback_begin (%s/%s, status=%d flags=%x)\n",
-		task->tk_pid, dentry->d_parent->d_name.name,
-		dentry->d_name.name, task->tk_status, req->wb_flags);
-
-	task->tk_status = 0;
-
-	/* Setup the task struct for a writeback call */
-	req->wb_flags |= NFS_WRITE_INPROGRESS;
-	req->wb_args.fh     = NFS_FH(dentry);
-	req->wb_args.offset = page->offset + req->wb_offset;
-	req->wb_args.count  = req->wb_bytes;
-	req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset);
+		nr++;
+	}
+	read_unlock(&nfs_wreq_lock);
 
-	rpc_call_setup(task, NFSPROC_WRITE, &req->wb_args, &req->wb_fattr, 0);
+	error = 0;
+	if (!nr)
+		goto out_error;
+
+	/* We give a COMMIT call a higher priority than ordinary RPC
+	 * calls because it is so critical to our memory balance.
+	 * (SWAPPER means it will always be inserted at the head of
+	 * any wait queue.
+	 */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	flags |= RPC_TASK_SWAPPER;
 
-	return;
+	/* Create a new RPC task and allocate RPC memory */
+	clnt = NFS_CLIENT(inode);
+
+	nfs_write_rpcsetup(dentry, inode, data, cluster->start, cluster->end);
+
+	/* Remember the commit sequence number.
+	 * When the commit call completes, all requests with sequence
+	 * number > data->commit are ignored because they're either
+	 * dirty, or were marked clean only after the commit call
+	 * was issued.
+	 */
+	data->commit  = cluster->sequence++;
+	data->count = nr;
+
+	rpc_init_task(task, clnt, nfs_commit_done, flags);
+	task->tk_calldata = cluster;
+	msg.proc = NFS3PROC_COMMIT;
+	msg.arg = &data->args;
+	msg.res = &data->res;
+	msg.cred = nfs_file_cred(filp);
+
+	dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
+	rpc_clnt_sigmask(clnt, &oldset);
+	rpc_call_setup(task, &msg, 0);
+	rpc_execute(task);
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return nr;
+ out_error:
+	cluster->committing = 0;
+	wake_up(&cluster->wait);
+	release_cluster(cluster);
+	return error;
 }
 
 /*
- * 2.	Collect the result
+ * COMMIT call returned
  */
 static void
-nfs_wback_result(struct rpc_task *task)
+nfs_commit_done(struct rpc_task *task)
 {
-	struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
-	struct file	*file = req->wb_file;
-	struct page	*page = req->wb_page;
-	int		status = task->tk_status;
-	struct dentry	*dentry = file->f_dentry;
-	struct inode	*inode = dentry->d_inode;
-
-	dprintk("NFS: %4d nfs_wback_result (%s/%s, status=%d, flags=%x)\n",
-		task->tk_pid, dentry->d_parent->d_name.name,
-		dentry->d_name.name, status, req->wb_flags);
-
-	/* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */
-	req->wb_flags |= NFS_WRITE_COMPLETE;
-	req->wb_status = status;
-
-	if (status < 0) {
-		req->wb_flags |= NFS_WRITE_INVALIDATE;
-		file->f_error = status;
-	} else if (!WB_CANCELLED(req)) {
-		struct nfs_fattr *fattr = &req->wb_fattr;
-		/* Update attributes as result of writeback. 
-		 * Beware: when UDP replies arrive out of order, we
-		 * may end up overwriting a previous, bigger file size.
-		 *
-		 * When the file size shrinks we cancel all pending
-		 * writebacks. 
-		 */
-		if (fattr->mtime.seconds >= inode->i_mtime) {
-			if (fattr->size < inode->i_size)
-				fattr->size = inode->i_size;
-
-			/* possible Solaris 2.5 server bug workaround */
-			if (inode->i_ino == fattr->fileid) {
-				/*
-				 * We expect these values to change, and
-				 * don't want to invalidate the caches.
-				 */
-				inode->i_size  = fattr->size;
-				inode->i_mtime = fattr->mtime.seconds;
-				nfs_refresh_inode(inode, fattr);
+        struct nfs_write_data   *data;
+        struct nfs_cluster      *cluster;
+        struct nfs_writeres     *resp;
+        struct nfs_page         *req;
+	struct dentry		*dentry;
+	struct inode		*inode;
+        unsigned int            nr;
+        int                     stable = 0;
+
+        dprintk("NFS: %4d nfs_commit_done (status %d)\n",
+                                task->tk_pid, task->tk_status);
+
+        cluster = (struct nfs_cluster *) task->tk_calldata;
+        data = &cluster->wdata;
+        resp    = &data->res;
+
+	dentry = cluster->file->f_dentry;
+	inode = dentry->d_inode;
+
+        if (task->tk_status < 0) {
+                /* A COMMIT error occurred. Just reschedule all pages
+                 * for sync writing. */
+		read_lock(&nfs_wreq_lock);
+                for (nr = 0; nr < CLUSTER_PAGES; nr++) {
+                        req = cluster->request[nr];
+                        if (req && req->wdata.commit <= data->commit)
+                                nfs_mark_request_dirty(cluster, req);
+				req->timeout = jiffies + 1 * HZ;
+				cluster_schedule_scan(cluster, req->timeout);
+                }
+		read_unlock(&nfs_wreq_lock);
+                stable |= FLUSH_STABLE;
+        } else {
+                /* Okay, COMMIT succeeded, apparently. Check the verifier
+                 * returned by the server against all stored verfs. */
+
+                dprintk("NFS: %4d scan committed (seq=%d)\n",
+                                task->tk_pid, cluster->sequence);
+		nfs_refresh_inode(inode, resp->fattr);
+		write_lock(&nfs_wreq_lock);
+		for (nr = 0; nr < CLUSTER_PAGES; nr++) {
+			if (!(req = cluster->request[nr]))
+				continue;
+
+			dprintk("NFS:      inspect req (seq %d)\n",
+				req->wdata.commit);
+
+			/* Ignore dirty pages and pages written to
+			 * the server after this commit call.
+			 * (note that req->commit is 0xFFFF if page dirty)
+			 */
+			if (req->wdata.commit > data->commit)
+				continue;
+
+			if (!memcmp(req->wdata.verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+				/* We have a match */
+				nfs_release_request(req);
+			} else {
+				/* We have a mismatch. Write the page again,
+				 * this time to stable storage right away */
+				dprintk("NFS:      verf mismatch...\n");
+				nfs_mark_request_dirty(cluster, req);
+				req->timeout = jiffies + 1 * HZ;
+				stable |= FLUSH_STABLE;
+				cluster_schedule_scan(cluster, req->timeout);
 			}
-			else
-				printk("nfs_wback_result: inode %ld, got %u?\n",
-					inode->i_ino, fattr->fileid);
 		}
+		write_unlock(&nfs_wreq_lock);
 	}
+	cluster_schedule_scan(cluster, jiffies + NFS_COMMIT_DELAY);
 
-	rpc_release_task(task);
-
-	if (WB_INVALIDATE(req))
-		clear_bit(PG_uptodate, &page->flags);
-
-	__free_page(page);
-	remove_write_request(&NFS_WRITEBACK(inode), req);
-	nr_write_requests--;
-	fput(req->wb_file);
+	task->tk_release = nfs_commit_release;
+}
 
-	wake_up(&req->wb_wait);
-	free_write_request(req);
+static void
+nfs_commit_release(struct rpc_task *task)
+{
+	struct nfs_cluster *cluster = (struct nfs_cluster *) task->tk_calldata;
+	cluster->committing = 0;
+	if (cluster->pages)
+		cluster_schedule_scan(cluster, jiffies + NFS_COMMIT_DELAY);
+	wake_up(&cluster->wait);
+	release_cluster(cluster);
 }
--- linux/fs/ext2/inode.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/ext2/inode.c	Fri Feb  4 23:26:34 2000
@@ -519,6 +519,16 @@
 	inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
 	inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
 	inode->u.ext2_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
+	/* We now have enough fields to check if the inode was active or not.
+	 * This is needed because nfsd might try to access dead inodes
+	 * the test is that same one that e2fsck uses
+	 * NeilBrown 1999oct15
+	 */
+	if (inode->i_nlink == 0 && (inode->i_mode == 0 || inode->u.ext2_i.i_dtime)) {
+		/* this inode is deleted */
+		brelse (bh);
+		goto bad_inode;
+	}
 	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
 	inode->i_version = ++global_event;
--- linux/fs/smbfs/file.c.nfsattack-gafton	Wed Feb 17 18:59:32 1999
+++ linux/fs/smbfs/file.c	Fri Feb  4 23:26:34 2000
@@ -188,15 +188,30 @@
 }
 
 static int
-smb_updatepage(struct file *file, struct page *page, unsigned long offset, unsigned int count, int sync)
+smb_updatepage(struct file *file, struct page *page, const char *buf, unsigned long offset, unsigned int count, int sync)
 {
 	struct dentry *dentry = file->f_dentry;
+	void *dest;
+	int result = -EFAULT;
+
+	set_bit(PG_locked, &page->flags);
 
 	pr_debug("SMBFS: smb_updatepage(%s/%s %d@%ld, sync=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 	 	count, page->offset+offset, sync);
 
-	return smb_writepage_sync(dentry, page, offset, count);
+	dest = (u8*)page_address(page) + offset;
+	if (dest != buf)
+		count -= copy_from_user(dest, buf, count);
+	if (!count) {
+		clear_bit(PG_uptodate, &page->flags);
+		goto out;
+	}
+
+	result = smb_writepage_sync(dentry, page, offset, count);
+ out:
+	smb_unlock_page(page);
+	return result;
 }
 
 static ssize_t
--- linux/fs/lockd/Makefile.nfsattack-gafton	Mon Apr  7 14:35:30 1997
+++ linux/fs/lockd/Makefile	Fri Feb  4 23:26:34 2000
@@ -10,6 +10,11 @@
 O_TARGET := lockd.o
 O_OBJS   := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
 	    svcproc.o svcsubs.o mon.o xdr.o
+
+ifdef CONFIG_NFS_V3
+  O_OBJS += xdr4.o
+endif
+
 OX_OBJS  := lockd_syms.o
 M_OBJS   := $(O_TARGET)
 
--- linux/fs/lockd/clntlock.c.nfsattack-gafton	Mon Aug  9 15:05:02 1999
+++ linux/fs/lockd/clntlock.c	Fri Feb  4 23:26:34 2000
@@ -152,7 +152,7 @@
 		host->h_monitored = 0;
 		host->h_nsmstate = newstate;
 		host->h_state++;
-		host->h_count++;
+		nlm_get_host(host);
 		kernel_thread(reclaimer, host, 0);
 	}
 }
--- linux/fs/lockd/clntproc.c.nfsattack-gafton	Wed Feb 17 12:44:33 1999
+++ linux/fs/lockd/clntproc.c	Fri Feb  4 23:26:34 2000
@@ -46,11 +46,13 @@
 {
 	struct nlm_args	*argp = &req->a_args;
 	struct nlm_lock	*lock = &argp->lock;
+	struct dentry   *dentry = fl->fl_file->f_dentry;
+	struct nfs_fh   *fh = NFS_FH(dentry);
 
 	memset(argp, 0, sizeof(*argp));
 	nlmclnt_next_cookie(&argp->cookie);
 	argp->state   = nsm_local_state;
-	lock->fh      = *NFS_FH(fl->fl_file->f_dentry);
+	memcpy(&lock->fh, fh, sizeof(*fh));
 	lock->caller  = system_utsname.nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
@@ -100,15 +102,19 @@
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
-	struct nfs_server	*nfssrv = NFS_SERVER(inode);
 	struct nlm_host		*host;
 	struct nlm_rqst		reqst, *call = &reqst;
 	sigset_t		oldset;
 	unsigned long		flags;
-	int			status;
+	int			status, proto = IPPROTO_UDP, vers;
 
 	/* Always use NLM version 1 over UDP for now... */
-	if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), IPPROTO_UDP, 1)))
+	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
+	if (NFS_PROTO(inode)->version > 3) {
+		printk(KERN_NOTICE "NFSv4 file locking not implemented!\n");
+		return -ENOLCK;
+	}
+	if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
 		return -ENOLCK;
 
 	/* Create RPC client handle if not there, and copy soft
@@ -122,9 +128,9 @@
 			status = -ENOLCK;
 			goto done;
 		}
-		clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-		clnt->cl_intr     = nfssrv->client->cl_intr;
-		clnt->cl_chatty   = nfssrv->client->cl_chatty;
+		clnt->cl_softrtry = NFS_CLIENT(inode)->cl_softrtry;
+		clnt->cl_intr     = NFS_CLIENT(inode)->cl_intr;
+		clnt->cl_chatty   = NFS_CLIENT(inode)->cl_chatty;
 	}
 
 	/* Keep the old signal mask */
@@ -141,6 +147,10 @@
 		spin_unlock_irqrestore(&current->sigmask_lock, flags);
 
 		call = nlmclnt_alloc_call();
+		if (!call) {
+			status = -ENOMEM;
+			goto out_restore;
+		}
 		call->a_flags = RPC_TASK_ASYNC;
 	} else {
 		spin_unlock_irqrestore(&current->sigmask_lock, flags);
@@ -166,6 +176,7 @@
 	if (status < 0 && (call->a_flags & RPC_TASK_ASYNC))
 		rpc_free(call);
 
+out_restore:
 	spin_lock_irqsave(&current->sigmask_lock, flags);
 	current->blocked = oldset;
 	recalc_sigpending(current);
@@ -220,11 +231,21 @@
 	struct rpc_clnt	*clnt;
 	struct nlm_args	*argp = &req->a_args;
 	struct nlm_res	*resp = &req->a_res;
+	struct file	*filp = argp->lock.fl.fl_file;
+	struct rpc_message msg;
 	int		status;
 
 	dprintk("lockd: call procedure %s on %s\n",
 			nlm_procname(proc), host->h_name);
 
+	msg.proc = proc;
+	msg.arg = argp;
+	msg.res = resp;
+	if (filp)
+		msg.cred = nfs_file_cred(filp);
+	else
+		msg.cred = NULL;
+
 	do {
 		if (host->h_reclaiming && !argp->reclaim) {
 			interruptible_sleep_on(&host->h_gracewait);
@@ -236,7 +257,7 @@
 			return -ENOLCK;
 
 		/* Perform the RPC call. If an error occurs, try again */
-		if ((status = rpc_call(clnt, proc, argp, resp, 0)) < 0) {
+		if ((status = rpc_call_sync(clnt, &msg, 0)) < 0) {
 			dprintk("lockd: rpc_call returned error %d\n", -status);
 			switch (status) {
 			case -EPROTONOSUPPORT:
@@ -288,13 +309,15 @@
 /*
  * Generic NLM call, async version.
  */
-int
-nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
+static int
+_nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback,
+		    struct rpc_cred *cred)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
 	struct nlm_args	*argp = &req->a_args;
 	struct nlm_res	*resp = &req->a_res;
+	struct rpc_message msg;
 	int		status;
 
 	dprintk("lockd: call procedure %s on %s (async)\n",
@@ -304,16 +327,42 @@
 	if ((clnt = nlm_bind_host(host)) == NULL)
 		return -ENOLCK;
 
+	/* Increment host refcount */
+        nlm_get_host(host);
+
         /* bootstrap and kick off the async RPC call */
-        status = rpc_do_call(clnt, proc, argp, resp, RPC_TASK_ASYNC,
-					callback, req);
+	msg.proc = proc;
+	msg.arg = argp;
+	msg.res =resp;
+	msg.cred = cred;
+        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req);
 
-	/* If the async call is proceeding, increment host refcount */
-        if (status >= 0 && (req->a_flags & RPC_TASK_ASYNC))
-                host->h_count++;
+	if (status < 0)
+		nlm_release_host(host);
 	return status;
 }
 
+
+int
+nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
+{
+	struct nlm_args	*argp = &req->a_args;
+	struct file	*filp = argp->lock.fl.fl_file;
+	struct rpc_cred *cred = NULL;
+
+	if (filp)
+		cred = nfs_file_cred(filp);
+
+	return _nlmclnt_async_call(req, proc, callback, cred);
+}
+
+int
+nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
+{
+	return _nlmclnt_async_call(req, proc, callback, NULL);
+}
+
+
 /*
  * TEST for the presence of a conflicting lock
  */
@@ -328,7 +377,7 @@
 	status = req->a_res.status;
 	if (status == NLM_LCK_GRANTED) {
 		fl->fl_type = F_UNLCK;
-	} if (status == NLM_LCK_DENIED) {
+	} else if (status == NLM_LCK_DENIED) {
 		/*
 		 * Report the conflicting lock back to the application.
 		 * FIXME: Is it OK to report the pid back as well?
@@ -342,6 +391,21 @@
 	return 0;
 }
 
+static
+void nlmclnt_insert_lock_callback(struct file_lock *fl)
+{
+	nlm_get_host(fl->fl_u.nfs_fl.host);
+}
+static
+void nlmclnt_remove_lock_callback(struct file_lock *fl)
+{
+	if (fl->fl_u.nfs_fl.host) {
+		nlm_release_host(fl->fl_u.nfs_fl.host);
+		fl->fl_u.nfs_fl.host = NULL;
+	}
+}
+
+
 /*
  * LOCK: Try to create a lock
  *
@@ -388,6 +452,9 @@
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
 		fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
+		fl->fl_u.nfs_fl.host = host;
+		fl->fl_insert = nlmclnt_insert_lock_callback;
+		fl->fl_remove = nlmclnt_remove_lock_callback;
 	}
 
 	return nlm_stat_to_errno(resp->status);
@@ -439,15 +506,9 @@
 static int
 nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 {
-	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
 	int		status;
 
-	/* No monitor, no lock: see nlmclnt_lock().
-	 * Since this is an UNLOCK, don't try to setup monitoring here. */
-	if (!host->h_monitored)
-		return -ENOLCK;
-
 	/* Clean the GRANTED flag now so the lock doesn't get
 	 * reclaimed while we're stuck in the unlock call. */
 	fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
@@ -482,17 +543,20 @@
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
-		nlm_rebind_host(req->a_host);
-		rpc_restart_call(task);
-		return;
+		goto retry_unlock;
 	}
 	if (status != NLM_LCK_GRANTED
 	 && status != NLM_LCK_DENIED_GRACE_PERIOD) {
 		printk("lockd: unexpected unlock status: %d\n", status);
 	}
-
-die:
-	rpc_release_task(task);
+ die:
+	nlm_release_host(req->a_host);
+	rpc_free(req);
+	return;
+ retry_unlock:
+	nlm_rebind_host(req->a_host);
+	rpc_restart_call(task);
+	return;
 }
 
 /*
@@ -515,10 +579,9 @@
 	recalc_sigpending(current);
 	spin_unlock_irqrestore(&current->sigmask_lock, flags);
 
-	do {
-		req = (struct nlm_rqst *) rpc_allocate(RPC_TASK_ASYNC,
-							sizeof(*req));
-	} while (req == NULL);
+	req = nlmclnt_alloc_call();
+	if (!req)
+		return -ENOMEM;
 	req->a_host  = host;
 	req->a_flags = RPC_TASK_ASYNC;
 
@@ -568,9 +631,8 @@
 	}
 
 die:
-	rpc_release_task(task);
 	nlm_release_host(req->a_host);
-	kfree(req);
+	rpc_free(req);
 	return;
 
 retry_cancel:
--- linux/fs/lockd/host.c.nfsattack-gafton	Mon Mar  1 13:34:56 1999
+++ linux/fs/lockd/host.c	Fri Feb  4 23:26:34 2000
@@ -15,6 +15,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
+#include <linux/lockd/sm_inter.h>
 
 
 #define NLMDBG_FACILITY		NLMDBG_HOSTCACHE
@@ -105,8 +106,7 @@
 				host->h_next = nlm_hosts[hash];
 				nlm_hosts[hash] = host;
 			}
-			host->h_expires = jiffies + NLM_HOST_EXPIRE;
-			host->h_count++;
+			nlm_get_host(host);
 			up(&nlm_host_sema);
 			return host;
 		}
@@ -229,13 +229,27 @@
 }
 
 /*
+ * Increment NLM host count
+ */
+struct nlm_host * nlm_get_host(struct nlm_host *host)
+{
+	if (host) {
+		dprintk("lockd: get host %s\n", host->h_name);
+		host->h_count ++;
+		host->h_expires = jiffies + NLM_HOST_EXPIRE;
+	}
+	return host;
+}
+
+/*
  * Release NLM host after use
  */
-void
-nlm_release_host(struct nlm_host *host)
+void nlm_release_host(struct nlm_host *host)
 {
-	dprintk("lockd: release host %s\n", host->h_name);
-	host->h_count -= 1;
+	if (host && host->h_count) {
+		dprintk("lockd: release host %s\n", host->h_name);
+		host->h_count --;
+	}
 }
 
 /*
@@ -307,6 +321,8 @@
 			}
 			dprintk("lockd: delete host %s\n", host->h_name);
 			*q = host->h_next;
+			if (host->h_monitored)
+				nsm_unmonitor(host);
 			if ((clnt = host->h_rpcclnt) != NULL) {
 				if (clnt->cl_users) {
 					printk(KERN_WARNING
--- linux/fs/lockd/mon.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/lockd/mon.c	Fri Feb  4 23:26:34 2000
@@ -4,6 +4,10 @@
  * The kernel statd client.
  *
  * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ *
+ * Note: In a future release, we should fold all NSM activity into
+ * rpc.mountd and the mount program, respectively. Stuff like this
+ * really doesn't belong in the kernel.	--okir
  */
 
 #include <linux/types.h>
@@ -30,14 +34,12 @@
  * Common procedure for SM_MON/SM_UNMON calls
  */
 static int
-nsm_mon_unmon(struct nlm_host *host, char *what, u32 proc)
+nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nlm_res *res)
 {
 	struct rpc_clnt	*clnt;
 	int		status;
 	struct nsm_args	args;
-	struct nsm_res	res;
 
-	dprintk("lockd: nsm_%s(%s)\n", what, host->h_name);
 	status = -EACCES;
 	clnt = nsm_create();
 	if (!clnt)
@@ -47,23 +49,15 @@
 	args.prog = NLM_PROGRAM;
 	args.vers = 1;
 	args.proc = NLMPROC_NSM_NOTIFY;
+	memset(res, 0, sizeof(*res));
 
-	status = rpc_call(clnt, proc, &args, &res, 0);
-	if (status < 0) {
+	status = rpc_call(clnt, proc, &args, res, 0);
+	if (status < 0)
 		printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
 			status);
-		goto out;
-	}
-
-	status = -EACCES;
-	if (res.status != 0) {
-		printk(KERN_NOTICE "lockd: cannot %s %s\n", what, host->h_name);
-		goto out;
-	}
-
-	nsm_local_state = res.state;
-	status = 0;
-out:
+	else
+		status = 0;
+ out:
 	return status;
 }
 
@@ -73,10 +67,16 @@
 int
 nsm_monitor(struct nlm_host *host)
 {
+	struct nlm_res	res;
 	int		status;
 
-	status = nsm_mon_unmon(host, "monitor", SM_MON);
-	if (status >= 0)
+	dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
+
+	status = nsm_mon_unmon(host, SM_MON, &res);
+
+	if (status < 0 || res.status != 0)
+		printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
+	else
 		host->h_monitored = 1;
 	return status;
 }
@@ -87,9 +87,15 @@
 int
 nsm_unmonitor(struct nlm_host *host)
 {
+	struct nlm_res	res;
 	int		status;
 
-	if ((status = nsm_mon_unmon(host, "unmonitor", SM_UNMON)) >= 0)
+	dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
+
+	status = nsm_mon_unmon(host, SM_UNMON, &res);
+	if (status < 0)
+		printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name);
+	else
 		host->h_monitored = 0;
 	return status;
 }
@@ -155,8 +161,8 @@
 	 */
 	sprintf(buffer, "%d.%d.%d.%d", (addr>>24) & 0xff, (addr>>16) & 0xff,
 				 	(addr>>8) & 0xff,  (addr) & 0xff);
-	if (!(p = xdr_encode_string(p, buffer))
-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
+	if (!(p = xdr_encode_string(p, buffer, -1))
+	 || !(p = xdr_encode_string(p, system_utsname.nodename, -1)))
 		return -EIO;
 	*p++ = htonl(argp->prog);
 	*p++ = htonl(argp->vers);
@@ -187,7 +193,7 @@
 static int
 xdr_decode_stat(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp)
 {
-	resp->status = ntohl(*p++);
+	resp->state = ntohl(*p++);
 	return 0;
 }
 
--- linux/fs/lockd/svc.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/lockd/svc.c	Fri Feb  4 23:26:34 2000
@@ -337,6 +337,7 @@
 {
 	/* FIXME: delete all NLM clients */
 	nlm_shutdown_hosts();
+	nlmxdr_shutdown();
 	do_lockdctl = NULL;
 }
 #endif
--- linux/fs/lockd/svclock.c.nfsattack-gafton	Mon Aug  9 15:05:02 1999
+++ linux/fs/lockd/svclock.c	Fri Feb  4 23:26:34 2000
@@ -520,7 +520,7 @@
 	nlmsvc_insert_block(block, jiffies + 30 * HZ);
 
 	/* Call the client */
-	nlmclnt_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+	nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
 						nlmsvc_grant_callback);
 	up(&file->f_sema);
 }
@@ -564,7 +564,6 @@
 	block->b_incall = 0;
 
 	nlm_release_host(call->a_host);
-	rpc_release_task(task);
 }
 
 /*
--- linux/fs/lockd/svcproc.c.nfsattack-gafton	Tue Oct 26 20:53:42 1999
+++ linux/fs/lockd/svcproc.c	Fri Feb  4 23:26:34 2000
@@ -468,7 +468,7 @@
 	host = nlmclnt_lookup_host(&rqstp->rq_addr,
 				rqstp->rq_prot, rqstp->rq_vers);
 	if (!host) {
-		kfree(call);
+		rpc_free(call);
 		return rpc_system_err;
 	}
 
@@ -476,7 +476,7 @@
 	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmclnt_async_call(call, proc, nlmsvc_callback_exit) < 0)
+	if (nlmsvc_async_call(call, proc, nlmsvc_callback_exit) < 0)
 		return rpc_system_err;
 
 	return rpc_success;
@@ -492,8 +492,7 @@
 					task->tk_pid, -task->tk_status);
 	}
 	nlm_release_host(call->a_host);
-	rpc_release_task(task);
-	kfree(call);
+	rpc_free(call);
 }
 
 /*
--- linux/fs/lockd/svcsubs.c.nfsattack-gafton	Tue Oct 26 20:53:42 1999
+++ linux/fs/lockd/svcsubs.c	Fri Feb  4 23:26:34 2000
@@ -48,7 +48,7 @@
 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 					struct nfs_fh *f)
 {
-	struct knfs_fh	*fh = (struct knfs_fh *) f;
+	struct knfs_fh	*fh = (struct knfs_fh *) f->data;
 	struct nlm_file	*file;
 	unsigned int	hash;
 	u32		nfserr;
--- linux/fs/lockd/xdr.c.nfsattack-gafton	Sat Feb  6 15:46:21 1999
+++ linux/fs/lockd/xdr.c	Fri Feb  4 23:26:34 2000
@@ -20,6 +20,7 @@
 
 #define NLMDBG_FACILITY		NLMDBG_XDR
 #define NLM_MAXSTRLEN		1024
+#define OFFSET_MAX		LONG_MAX
 
 #define QUADLEN(len)		(((len) + 3) >> 2)
 
@@ -29,6 +30,8 @@
 
 
 typedef struct nlm_args	nlm_args;
+static void nlm_register_stats(void);
+static void nlm_unregister_stats(void);
 
 /*
  * Initialization of NFS status variables
@@ -48,6 +51,14 @@
 	nlm_lck_denied_grace_period = htonl(NLM_LCK_DENIED_GRACE_PERIOD);
 
 	inited = 1;
+
+	nlm_register_stats();
+}
+
+void
+nlmxdr_shutdown(void)
+{
+	nlm_unregister_stats();
 }
 
 /*
@@ -93,22 +104,23 @@
 {
 	unsigned int	len;
 
-	if ((len = ntohl(*p++)) != sizeof(*f)) {
+	if ((len = ntohl(*p++)) != NFS2_FHSIZE) {
 		printk(KERN_NOTICE
 			"lockd: bad fhandle size %x (should be %d)\n",
-			len, sizeof(*f));
+			len, NFS2_FHSIZE);
 		return NULL;
 	}
-	memcpy(f, p, sizeof(*f));
-	return p + XDR_QUADLEN(sizeof(*f));
+	f->size = NFS2_FHSIZE;
+	memcpy(f->data, p, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
 static inline u32 *
 nlm_encode_fh(u32 *p, struct nfs_fh *f)
 {
-	*p++ = htonl(sizeof(*f));
-	memcpy(p, f, sizeof(*f));
-	return p + XDR_QUADLEN(sizeof(*f));
+	*p++ = htonl(NFS2_FHSIZE);
+	memcpy(p, f->data, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
 /*
@@ -145,7 +157,7 @@
 	fl->fl_start = ntohl(*p++);
 	len = ntohl(*p++);
 	if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0)
-		fl->fl_end = NLM_OFFSET_MAX;
+		fl->fl_end = OFFSET_MAX;
 	return p;
 }
 
@@ -157,17 +169,21 @@
 {
 	struct file_lock	*fl = &lock->fl;
 
-	if (!(p = xdr_encode_string(p, lock->caller))
+	if (!(p = xdr_encode_string(p, lock->caller, -1))
 	 || !(p = nlm_encode_fh(p, &lock->fh))
 	 || !(p = nlm_encode_oh(p, &lock->oh)))
 		return NULL;
 
+	if (fl->fl_start > NLM_OFFSET_MAX
+	 || (fl->fl_end > NLM_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
+		return NULL;
+
 	*p++ = htonl(fl->fl_pid);
-	*p++ = htonl(lock->fl.fl_start);
-	if (lock->fl.fl_end == NLM_OFFSET_MAX)
+	*p++ = htonl(fl->fl_start);
+	if (fl->fl_end == OFFSET_MAX)
 		*p++ = xdr_zero;
 	else
-		*p++ = htonl(lock->fl.fl_end - lock->fl.fl_start + 1);
+		*p++ = htonl(fl->fl_end - fl->fl_start + 1);
 
 	return p;
 }
@@ -193,7 +209,7 @@
 			return 0;
 
 		*p++ = htonl(fl->fl_start);
-		if (fl->fl_end == NLM_OFFSET_MAX)
+		if (fl->fl_end == OFFSET_MAX)
 			*p++ = xdr_zero;
 		else
 			*p++ = htonl(fl->fl_end - fl->fl_start + 1);
@@ -428,7 +444,7 @@
 		fl->fl_start = ntohl(*p++);
 		len = ntohl(*p++);
 		if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0)
-			fl->fl_end = NLM_OFFSET_MAX;
+			fl->fl_end = OFFSET_MAX;
 	}
 	return 0;
 }
@@ -512,10 +528,10 @@
  */
 #define NLM_void_sz		0
 #define NLM_cookie_sz		3	/* 1 len , 2 data */
-#define NLM_caller_sz		1+QUADLEN(sizeof(system_utsname.nodename))
+#define NLM_caller_sz		1+QUADLEN(NLM_MAXSTRLEN)
 #define NLM_netobj_sz		1+QUADLEN(XDR_MAX_NETOBJ)
 /* #define NLM_owner_sz		1+QUADLEN(NLM_MAXOWNER) */
-#define NLM_fhandle_sz		1+QUADLEN(NFS_FHSIZE)
+#define NLM_fhandle_sz		1+QUADLEN(NFS2_FHSIZE)
 #define NLM_lock_sz		3+NLM_caller_sz+NLM_netobj_sz+NLM_fhandle_sz
 #define NLM_holder_sz		4+NLM_netobj_sz
 
@@ -541,7 +557,8 @@
     { "nlm_" #proc,						\
       (kxdrproc_t) nlmclt_encode_##argtype,			\
       (kxdrproc_t) nlmclt_decode_##restype,			\
-      MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2		\
+      MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2,		\
+      0								\
     }
 
 static struct rpc_procinfo	nlm_procedures[] = {
@@ -586,14 +603,21 @@
 	3, 24, nlm_procedures,
 };
 
+#ifdef CONFIG_NFS_V3
+extern struct rpc_version nlm_version4;
+#endif
+
 static struct rpc_version *	nlm_versions[] = {
 	NULL,
 	&nlm_version1,
 	NULL,
 	&nlm_version3,
+#ifdef CONFIG_NFS_V3
+	&nlm_version4,
+#endif
 };
 
-static struct rpc_stat		nlm_stats;
+static struct rpc_stat		nlm_stats = { &nlm_program };
 
 struct rpc_program		nlm_program = {
 	"lockd",
@@ -613,3 +637,13 @@
 }
 #endif
 
+static void nlm_register_stats(void) {
+#ifdef CONFIG_PROC_FS
+	rpc_proc_register(&nlm_stats);
+#endif
+}
+static void nlm_unregister_stats(void) {
+#ifdef CONFIG_PROC_FS
+	rpc_proc_unregister("lockd");
+#endif
+}
--- linux/fs/lockd/xdr4.c.nfsattack-gafton	Fri Feb  4 23:26:34 2000
+++ linux/fs/lockd/xdr4.c	Fri Feb  4 23:26:34 2000
@@ -0,0 +1,582 @@
+/*
+ * linux/fs/lockd/xdr.c
+ *
+ * XDR support for lockd and the lock client.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ * Copyright (C) 1999, Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/nfs.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/lockd/lockd.h>
+#include <linux/lockd/sm_inter.h>
+
+#define NLMDBG_FACILITY		NLMDBG_XDR
+#define NLM_MAXSTRLEN		1024
+#define OFFSET_MAX		((off_t)LONG_MAX)
+
+#define QUADLEN(len)		(((len) + 3) >> 2)
+
+
+typedef struct nlm_args	nlm_args;
+
+static inline off_t
+size_to_off_t(__s64 size)
+{
+        size = (size > (__s64)LONG_MAX) ? (off_t)LONG_MAX : (off_t) size;
+        return (size < (__s64)-LONG_MAX) ? (off_t)-LONG_MAX : (off_t) size;
+}
+
+/*
+ * XDR functions for basic NLM types
+ */
+static u32 *
+nlm4_decode_cookie(u32 *p, struct nlm_cookie *c)
+{
+	unsigned int	len;
+
+	len = ntohl(*p++);
+	
+	if(len==0)
+	{
+		c->len=4;
+		memset(c->data, 0, 4);	/* hockeypux brain damage */
+	}
+	else if(len<=8)
+	{
+		c->len=len;
+		memcpy(c->data, p, len);
+		p+=(len+3)>>2;
+	}
+	else 
+	{
+		printk(KERN_NOTICE
+			"lockd: bad cookie size %d (only cookies under 8 bytes are supported.)\n", len);
+		return NULL;
+	}
+	return p;
+}
+
+static u32 *
+nlm4_encode_cookie(u32 *p, struct nlm_cookie *c)
+{
+	*p++ = htonl(c->len);
+	memcpy(p, c->data, c->len);
+	p+=(c->len+3)>>2;
+	return p;
+}
+
+static u32 *
+nlm4_decode_fh(u32 *p, struct nfs_fh *f)
+{
+	memset(f->data, 0, sizeof(f->data));
+	f->size = ntohl(*p++);
+	if (f->size > NFS_MAXFHSIZE) {
+		printk(KERN_NOTICE
+			"lockd: bad fhandle size %x (should be %d)\n",
+			f->size, NFS_MAXFHSIZE);
+		return NULL;
+	}
+      	memcpy(f->data, p, f->size);
+	return p + XDR_QUADLEN(f->size);
+}
+
+static u32 *
+nlm4_encode_fh(u32 *p, struct nfs_fh *f)
+{
+	*p++ = htonl(f->size);
+	memcpy(p, f->data, f->size);
+	return p + XDR_QUADLEN(f->size);
+}
+
+/*
+ * Encode and decode owner handle
+ */
+static u32 *
+nlm4_decode_oh(u32 *p, struct xdr_netobj *oh)
+{
+	return xdr_decode_netobj(p, oh);
+}
+
+static u32 *
+nlm4_encode_oh(u32 *p, struct xdr_netobj *oh)
+{
+	return xdr_encode_netobj(p, oh);
+}
+
+static u32 *
+nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
+{
+	struct file_lock	*fl = &lock->fl;
+	__s64			len, start, end;
+	int			tmp;
+
+	if (!(p = xdr_decode_string(p, &lock->caller, &tmp, NLM_MAXSTRLEN))
+	 || !(p = nlm4_decode_fh(p, &lock->fh))
+	 || !(p = nlm4_decode_oh(p, &lock->oh)))
+		return NULL;
+
+	memset(fl, 0, sizeof(*fl));
+	fl->fl_owner = current->files;
+	fl->fl_pid   = ntohl(*p++);
+	fl->fl_flags = FL_POSIX;
+	fl->fl_type  = F_RDLCK;		/* as good as anything else */
+	p = xdr_decode_hyper(p, &start);
+	p = xdr_decode_hyper(p, &len);
+	end = start + len - 1;
+
+	fl->fl_start = size_to_off_t(start);
+	fl->fl_end = size_to_off_t(end);
+
+	if (len == 0 || fl->fl_end < 0)
+		fl->fl_end = OFFSET_MAX;
+	return p;
+}
+
+/*
+ * Encode a lock as part of an NLM call
+ */
+static u32 *
+nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
+{
+	struct file_lock	*fl = &lock->fl;
+
+	if (!(p = xdr_encode_string(p, lock->caller, -1))
+	 || !(p = nlm4_encode_fh(p, &lock->fh))
+	 || !(p = nlm4_encode_oh(p, &lock->oh)))
+		return NULL;
+
+	*p++ = htonl(fl->fl_pid);
+	p = xdr_encode_hyper(p, fl->fl_start);
+	if (fl->fl_end == OFFSET_MAX)
+		p = xdr_encode_hyper(p, 0);
+	else
+		p = xdr_encode_hyper(p, fl->fl_end - fl->fl_start + 1);
+
+	return p;
+}
+
+/*
+ * Encode result of a TEST/TEST_MSG call
+ */
+static u32 *
+nlm4_encode_testres(u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
+		return 0;
+	*p++ = resp->status;
+
+	if (resp->status == nlm_lck_denied) {
+		struct file_lock	*fl = &resp->lock.fl;
+
+		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
+		*p++ = htonl(fl->fl_pid);
+
+		/* Encode owner handle. */
+		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
+			return 0;
+
+		p = xdr_encode_hyper(p, fl->fl_start);
+		if (fl->fl_end == OFFSET_MAX)
+			p = xdr_encode_hyper(p, 0);
+		else
+			p = xdr_encode_hyper(p, fl->fl_end - fl->fl_start + 1);
+	}
+
+	return p;
+}
+
+
+/*
+ * Check buffer bounds after decoding arguments
+ */
+static int
+xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
+{
+	struct svc_buf	*buf = &rqstp->rq_argbuf;
+
+	return p - buf->base <= buf->buflen;
+}
+
+static int
+xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
+{
+	struct svc_buf	*buf = &rqstp->rq_resbuf;
+
+	buf->len = p - buf->base;
+	return (buf->len <= buf->buflen);
+}
+
+/*
+ * First, the server side XDR functions
+ */
+int
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+{
+	u32	exclusive;
+
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
+		return 0;
+
+	exclusive = ntohl(*p++);
+	if (!(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
+	if (exclusive)
+		argp->lock.fl.fl_type = F_WRLCK;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_encode_testres(p, resp)))
+		return 0;
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+{
+	u32	exclusive;
+
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
+		return 0;
+	argp->block  = ntohl(*p++);
+	exclusive    = ntohl(*p++);
+	if (!(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
+	if (exclusive)
+		argp->lock.fl.fl_type = F_WRLCK;
+	argp->reclaim = ntohl(*p++);
+	argp->state   = ntohl(*p++);
+	argp->monitor = 1;		/* monitor client by default */
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+{
+	u32	exclusive;
+
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
+		return 0;
+	argp->block = ntohl(*p++);
+	exclusive = ntohl(*p++);
+	if (!(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
+	if (exclusive)
+		argp->lock.fl.fl_type = F_WRLCK;
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+{
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
+	 || !(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
+	argp->lock.fl.fl_type = F_UNLCK;
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+{
+	struct nlm_lock	*lock = &argp->lock;
+	int		len;
+
+	memset(lock, 0, sizeof(*lock));
+	lock->fl.fl_pid = ~(u32) 0;
+
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
+	 || !(p = xdr_decode_string(p, &lock->caller, &len, NLM_MAXSTRLEN))
+	 || !(p = nlm4_decode_fh(p, &lock->fh))
+	 || !(p = nlm4_decode_oh(p, &lock->oh)))
+		return 0;
+	argp->fsm_mode = ntohl(*p++);
+	argp->fsm_access = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
+		return 0;
+	*p++ = resp->status;
+	*p++ = xdr_zero;		/* sequence argument */
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlm4svc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
+		return 0;
+	*p++ = resp->status;
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
+{
+	struct nlm_lock	*lock = &argp->lock;
+	int		len;
+
+	if (!(p = xdr_decode_string(p, &lock->caller, &len, NLM_MAXSTRLEN)))
+		return 0;
+	argp->state = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
+{
+	if (!(p = xdr_decode_string(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
+		return 0;
+	argp->state = ntohl(*p++);
+	argp->addr = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
+		return 0;
+	resp->status = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nlm4svc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * Now, the client side XDR functions
+ */
+static int
+nlm4clt_encode_void(struct rpc_rqst *req, u32 *p, void *ptr)
+{
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+nlm4clt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
+{
+	return 0;
+}
+
+static int
+nlm4clt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+{
+	struct nlm_lock	*lock = &argp->lock;
+
+	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
+		return -EIO;
+	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
+	if (!(p = nlm4_encode_lock(p, lock)))
+		return -EIO;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
+		return -EIO;
+	resp->status = ntohl(*p++);
+	if (resp->status == NLM_LCK_DENIED) {
+		struct file_lock	*fl = &resp->lock.fl;
+		u32			excl;
+		s64			start, end, len;
+
+		memset(&resp->lock, 0, sizeof(resp->lock));
+		excl = ntohl(*p++);
+		fl->fl_pid = ntohl(*p++);
+		if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
+			return -EIO;
+
+		fl->fl_flags = FL_POSIX;
+		fl->fl_type  = excl? F_WRLCK : F_RDLCK;
+		p = xdr_decode_hyper(p, &start);
+		p = xdr_decode_hyper(p, &len);
+		end = start + len - 1;
+
+		fl->fl_start = size_to_off_t(start);
+		fl->fl_end = size_to_off_t(end);
+		if (len == 0 || fl->fl_end < 0)
+			fl->fl_end = OFFSET_MAX;
+	}
+	return 0;
+}
+
+
+static int
+nlm4clt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+{
+	struct nlm_lock	*lock = &argp->lock;
+
+	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
+		return -EIO;
+	*p++ = argp->block? xdr_one : xdr_zero;
+	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
+	if (!(p = nlm4_encode_lock(p, lock)))
+		return -EIO;
+	*p++ = argp->reclaim? xdr_one : xdr_zero;
+	*p++ = htonl(argp->state);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+nlm4clt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+{
+	struct nlm_lock	*lock = &argp->lock;
+
+	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
+		return -EIO;
+	*p++ = argp->block? xdr_one : xdr_zero;
+	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
+	if (!(p = nlm4_encode_lock(p, lock)))
+		return -EIO;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+nlm4clt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+{
+	struct nlm_lock	*lock = &argp->lock;
+
+	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
+		return -EIO;
+	if (!(p = nlm4_encode_lock(p, lock)))
+		return -EIO;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+nlm4clt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
+		return -EIO;
+	*p++ = resp->status;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+nlm4clt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_encode_testres(p, resp)))
+		return -EIO;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+{
+	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
+		return -EIO;
+	resp->status = ntohl(*p++);
+	return 0;
+}
+
+/*
+ * Buffer requirements for NLM
+ */
+#define NLM4_void_sz		0
+#define NLM4_cookie_sz		3	/* 1 len , 2 data */
+#define NLM4_caller_sz		1+XDR_QUADLEN(NLM_MAXSTRLEN)
+#define NLM4_netobj_sz		1+XDR_QUADLEN(XDR_MAX_NETOBJ)
+/* #define NLM4_owner_sz		1+XDR_QUADLEN(NLM4_MAXOWNER) */
+#define NLM4_fhandle_sz		1+XDR_QUADLEN(NFS3_FHSIZE)
+#define NLM4_lock_sz		5+NLM4_caller_sz+NLM4_netobj_sz+NLM4_fhandle_sz
+#define NLM4_holder_sz		6+NLM4_netobj_sz
+
+#define NLM4_testargs_sz	NLM4_cookie_sz+1+NLM4_lock_sz
+#define NLM4_lockargs_sz	NLM4_cookie_sz+4+NLM4_lock_sz
+#define NLM4_cancargs_sz	NLM4_cookie_sz+2+NLM4_lock_sz
+#define NLM4_unlockargs_sz	NLM4_cookie_sz+NLM4_lock_sz
+
+#define NLM4_testres_sz		NLM4_cookie_sz+1+NLM4_holder_sz
+#define NLM4_res_sz		NLM4_cookie_sz+1
+#define NLM4_norep_sz		0
+
+#ifndef MAX
+# define MAX(a,b)		(((a) > (b))? (a) : (b))
+#endif
+
+/*
+ * For NLM, a void procedure really returns nothing
+ */
+#define nlm4clt_decode_norep	NULL
+
+#define PROC(proc, argtype, restype)				\
+    { "nlm4_" #proc,						\
+      (kxdrproc_t) nlm4clt_encode_##argtype,			\
+      (kxdrproc_t) nlm4clt_decode_##restype,			\
+      MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2,	\
+      0								\
+    }
+
+static struct rpc_procinfo	nlm4_procedures[] = {
+    PROC(null,		void,		void),
+    PROC(test,		testargs,	testres),
+    PROC(lock,		lockargs,	res),
+    PROC(canc,		cancargs,	res),
+    PROC(unlock,	unlockargs,	res),
+    PROC(granted,	testargs,	res),
+    PROC(test_msg,	testargs,	norep),
+    PROC(lock_msg,	lockargs,	norep),
+    PROC(canc_msg,	cancargs,	norep),
+    PROC(unlock_msg,	unlockargs,	norep),
+    PROC(granted_msg,	testargs,	norep),
+    PROC(test_res,	testres,	norep),
+    PROC(lock_res,	res,		norep),
+    PROC(canc_res,	res,		norep),
+    PROC(unlock_res,	res,		norep),
+    PROC(granted_res,	res,		norep),
+    PROC(undef,		void,		void),
+    PROC(undef,		void,		void),
+    PROC(undef,		void,		void),
+    PROC(undef,		void,		void),
+#ifdef NLMCLNT_SUPPORT_SHARES
+    PROC(share,		shareargs,	shareres),
+    PROC(unshare,	shareargs,	shareres),
+    PROC(nm_lock,	lockargs,	res),
+    PROC(free_all,	notify,		void),
+#else
+    PROC(undef,		void,		void),
+    PROC(undef,		void,		void),
+    PROC(undef,		void,		void),
+    PROC(undef,		void,		void),
+#endif
+};
+
+struct rpc_version	nlm_version4 = {
+	4, 24, nlm4_procedures,
+};
--- linux/fs/nfsd/export.c.nfsattack-gafton	Tue Oct 26 20:53:42 1999
+++ linux/fs/nfsd/export.c	Fri Feb  4 23:26:34 2000
@@ -366,16 +366,6 @@
 				exp->ex_parent = unexp->ex_parent;
 	}
 
-	/*
-	 * Check whether this is the last export for this device,
-	 * and if so flush any cached dentries.
-	 */
-	if (!exp_device_in_use(unexp->ex_dev)) {
-printk("exp_do_unexport: %s last use, flushing cache\n",
-	kdevname(unexp->ex_dev));
-		nfsd_fh_flush(unexp->ex_dev);
-	}
-
 	dentry = unexp->ex_dentry;
 	inode = dentry->d_inode;
 	if (unexp->ex_dev != inode->i_dev || unexp->ex_ino != inode->i_ino)
@@ -628,7 +618,8 @@
 	{ NFSEXP_UIDMAP, {"uidmap", ""}},
 	{ NFSEXP_KERBEROS, { "kerberos", ""}},
 	{ NFSEXP_SUNSECURE, { "sunsecure", ""}},
-	{ NFSEXP_CROSSMNT, {"crossmnt", ""}},
+	{ NFSEXP_CROSSMNT, {"nohide", ""}},
+ 	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
 	{ 0, {"", ""}}
 };
 
--- linux/fs/nfsd/nfsfh.c.nfsattack-gafton	Fri Feb  4 23:26:28 2000
+++ linux/fs/nfsd/nfsfh.c	Fri Feb  4 23:29:43 2000
@@ -5,6 +5,7 @@
  *
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  * Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org>
+ * Extensive cleanup by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999
  */
 
 #include <linux/sched.h>
@@ -22,334 +23,49 @@
 #define NFSDDBG_FACILITY		NFSDDBG_FH
 #define NFSD_PARANOIA 1
 /* #define NFSD_DEBUG_VERBOSE 1 */
-/* #define NFSD_DEBUG_VERY_VERBOSE 1 */
 
-extern unsigned long max_mapnr;
-
-#define NFSD_FILE_CACHE 0
-#define NFSD_DIR_CACHE  1
-struct fh_entry {
-	struct dentry * dentry;
-	unsigned long reftime;
-	ino_t	ino;
-	kdev_t	dev;
-};
-
-#define NFSD_MAXFH \
-  (((nfsd_nservers + 1) >> 1) * PAGE_SIZE/sizeof(struct fh_entry))
-static struct fh_entry *filetable = NULL;
-static struct fh_entry *dirstable = NULL;
 
 static int nfsd_nr_verified = 0;
 static int nfsd_nr_put = 0;
-static unsigned long nfsd_next_expire = 0;
-
-static int add_to_fhcache(struct dentry *, int);
-struct dentry * lookup_inode(kdev_t, ino_t, ino_t);
-
-static LIST_HEAD(fixup_head);
-static LIST_HEAD(path_inuse);
-static int nfsd_nr_fixups = 0;
-static int nfsd_nr_paths = 0;
-#define NFSD_MAX_PATHS 500
-#define NFSD_MAX_FIXUPS 500
-#define NFSD_MAX_FIXUP_AGE 30*HZ
-
-struct nfsd_fixup {
-	struct list_head lru;
-	unsigned long reftime;
-	ino_t	dirino;
-	ino_t	ino;
-	kdev_t	dev;
-	ino_t	new_dirino;
-};
-
-struct nfsd_path {
-	struct list_head lru;
-	unsigned long reftime;
-	int	users;
-	ino_t	ino;
-	kdev_t	dev;
-	char	name[1];
-};
-
-static struct nfsd_fixup *
-find_cached_lookup(kdev_t dev, ino_t dirino, ino_t ino)
-{
-	struct list_head *tmp = fixup_head.next;
-
-	for (; tmp != &fixup_head; tmp = tmp->next) {
-		struct nfsd_fixup *fp;
-
-		fp = list_entry(tmp, struct nfsd_fixup, lru);
-#ifdef NFSD_DEBUG_VERY_VERBOSE
-printk("fixup %lu %lu, %lu %lu %s %s\n",
-        fp->ino, ino,
-	fp->dirino, dirino,
-	kdevname(fp->dev), kdevname(dev));
-#endif
-		if (fp->ino != ino)
-			continue;
-		if (fp->dirino != dirino)
-			continue;
-		if (fp->dev != dev)
-			continue;
-		fp->reftime = jiffies;	
-		list_del(tmp);
-		list_add(tmp, &fixup_head);
-		return fp;
-	}
-	return NULL;
-}
-
-/*
- * Save the dirino from a rename.
- */
-void
-add_to_rename_cache(ino_t new_dirino,
-                    kdev_t dev, ino_t dirino, ino_t ino)
-{
-	struct nfsd_fixup *fp;
-
-	if (dirino == new_dirino)
-		return;
-
-	fp = find_cached_lookup(dev, 
-				dirino,
-				ino);
-	if (fp) {
-		fp->new_dirino = new_dirino;
-		return;
-	}
-
-	/*
-	 * Add a new entry. The small race here is unimportant:
-	 * if another task adds the same lookup, both entries
-	 * will be consistent.
-	 */
-	fp = kmalloc(sizeof(struct nfsd_fixup), GFP_KERNEL);
-	if (fp) {
-		fp->dirino = dirino;
-		fp->ino = ino;
-		fp->dev = dev;
-		fp->new_dirino = new_dirino;
-		list_add(&fp->lru, &fixup_head);
-		nfsd_nr_fixups++;
-	}
-}
-
-/*
- * Save the dentry pointer from a successful lookup.
- */
-
-static void free_fixup_entry(struct nfsd_fixup *fp)
-{
-	list_del(&fp->lru);
-#ifdef NFSD_DEBUG_VERY_VERBOSE
-printk("free_rename_entry: %lu->%lu %lu/%s\n",
-		fp->dirino,
-		fp->new_dirino,
-		fp->ino,
-		kdevname(fp->dev),
-		(jiffies - fp->reftime));
-#endif
-	kfree(fp);
-	nfsd_nr_fixups--;
-}
-
-/*
- * Copy a dentry's path into the specified buffer.
- */
-static int copy_path(char *buffer, struct dentry *dentry, int namelen)
-{
-	char *p, *b = buffer;
-	int result = 0, totlen = 0, len; 
-
-	while (1) {
-		struct dentry *parent;
-		dentry = dentry->d_covers;
-		parent = dentry->d_parent;
-		len = dentry->d_name.len;
-		p = (char *) dentry->d_name.name + len;
-		totlen += len;
-		if (totlen > namelen)
-			goto out;
-		while (len--)
-			*b++ = *(--p);
-		if (dentry == parent)
-			break;
-		dentry = parent;
-		totlen++;
-		if (totlen > namelen)
-			goto out;
-		*b++ = '/';
-	}
-	*b = 0;
-
-	/*
-	 * Now reverse in place ...
-	 */
-	p = buffer;
-	while (p < b) {
-		char c = *(--b);
-		*b = *p;
-		*p++ = c;
-	} 
-	result = 1;
-out:
-	return result;
-}
-
-/*
- * Add a dentry's path to the path cache.
- */
-static int add_to_path_cache(struct dentry *dentry)
-{
-	struct inode *inode = dentry->d_inode;
-	struct dentry *this;
-	struct nfsd_path *new;
-	int len, result = 0;
-
-#ifdef NFSD_DEBUG_VERBOSE
-printk("add_to_path_cache: caching %s/%s\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-	/*
-	 * Get the length of the full pathname.
-	 */
-restart:
-	len = 0;
-	this = dentry;
-	while (1) {
-		struct dentry *parent;
-		this = this->d_covers;
-		parent = this->d_parent;
-		len += this->d_name.len;
-		if (this == parent)
-			break;
-		this = parent;
-		len++;
-	}
-	/*
-	 * Allocate a structure to hold the path.
-	 */
-	new = kmalloc(sizeof(struct nfsd_path) + len, GFP_KERNEL);
-	if (new) {
-		new->users = 0;	
-		new->reftime = jiffies;	
-		new->ino = inode->i_ino;
-		new->dev = inode->i_dev;
-		result = copy_path(new->name, dentry, len);
-		if (!result)
-			goto retry;
-		list_add(&new->lru, &path_inuse);
-		nfsd_nr_paths++;
-#ifdef NFSD_DEBUG_VERBOSE
-printk("add_to_path_cache: added %s, paths=%d\n", new->name, nfsd_nr_paths);
-#endif
-	}
-	return result;
-
-	/*
-	 * If the dentry's path length changed, just try again.
-	 */
-retry:
-	kfree(new);
-	printk(KERN_DEBUG "add_to_path_cache: path length changed, retrying\n");
-	goto restart;
-}
 
-/*
- * Search for a path entry for the specified (dev, inode).
- */
-static struct nfsd_path *get_path_entry(kdev_t dev, ino_t ino)
-{
-	struct nfsd_path *pe;
-	struct list_head *tmp;
-
-	for (tmp = path_inuse.next; tmp != &path_inuse; tmp = tmp->next) {
-		pe = list_entry(tmp, struct nfsd_path, lru);
-		if (pe->ino != ino)
-			continue;
-		if (pe->dev != dev)
-			continue;
-		list_del(tmp);
-		list_add(tmp, &path_inuse);
-		pe->users++;
-		pe->reftime = jiffies;
-#ifdef NFSD_PARANOIA
-printk("get_path_entry: found %s for %s/%ld\n", pe->name, kdevname(dev), ino);
-#endif
-		return pe;
-	}
-	return NULL;
-}
-
-static void put_path(struct nfsd_path *pe)
-{
-	pe->users--;
-}
-
-static void free_path_entry(struct nfsd_path *pe)
-{
-	if (pe->users)
-		printk(KERN_DEBUG "free_path_entry: %s in use, users=%d\n",
-			pe->name, pe->users);
-	list_del(&pe->lru);
-	kfree(pe);
-	nfsd_nr_paths--;
-}
 
 struct nfsd_getdents_callback {
-	struct nfsd_dirent *dirent;
-	ino_t dirino;		/* parent inode number */
-	int found;		/* dirent inode matched? */
+	struct qstr *name;	/* name that was found. name->name already points to a buffer */
+	unsigned long ino;	/* the inum we are looking for */
+	int found;		/* inode matched? */
 	int sequence;		/* sequence counter */
 };
 
-struct nfsd_dirent {
-	ino_t ino;		/* preset to desired entry */
-	int len;
-	char name[256];
-};
-
 /*
- * A rather strange filldir function to capture the inode number
- * for the second entry (the parent inode) and the name matching
- * the specified inode number.
+ * A rather strange filldir function to capture
+ * the name matching the specified inode number.
  */
-static int filldir_one(void * __buf, const char * name, int len, 
+static int filldir_one(void * __buf, const char * name, int len,
 			off_t pos, ino_t ino)
 {
 	struct nfsd_getdents_callback *buf = __buf;
-	struct nfsd_dirent *dirent = buf->dirent;
+	struct qstr *qs = buf->name;
+	char *nbuf = (char*)qs->name; /* cast is to get rid of "const" */
 	int result = 0;
 
 	buf->sequence++;
-#ifdef NFSD_DEBUG_VERY_VERBOSE
-printk("filldir_one: seq=%d, ino=%lu, name=%s\n", buf->sequence, ino, name);
+#ifdef NFSD_DEBUG_VERBOSE
+dprintk("filldir_one: seq=%d, ino=%ld, name=%s\n", buf->sequence, ino, name);
 #endif
-	if (buf->sequence == 2) {
-		buf->dirino = ino;
-		goto out;
-	}
-	if (dirent->ino == ino) {
-		dirent->len = len;
-		memcpy(dirent->name, name, len);
-		dirent->name[len] = '\0';
+	if (buf->ino == ino) {
+		qs->len = len;
+		memcpy(nbuf, name, len);
+		nbuf[len] = '\0';
 		buf->found = 1;
 		result = -1;
 	}
-out:
 	return result;
 }
 
 /*
- * Read a directory and return the parent inode number and the name
- * of the specified entry. The dirent must be initialized with the
- * inode number of the desired entry.
+ * Read a directory and return the name of the specified entry.
  */
-static int get_parent_ino(struct dentry *dentry, struct nfsd_dirent *dirent)
+static int get_ino_name(struct dentry *dentry, struct qstr *name, unsigned long ino)
 {
 	struct inode *dir = dentry->d_inode;
 	int error;
@@ -372,8 +88,8 @@
 	if (!file.f_op->readdir)
 		goto out_close;
 
-	buffer.dirent = dirent;
-	buffer.dirino = 0;
+	buffer.name = name;
+	buffer.ino = ino;
 	buffer.found = 0;
 	buffer.sequence = 0;
 	while (1) {
@@ -391,7 +107,6 @@
 		if (old_seq == buffer.sequence)
 			break;
 	}
-	dirent->ino = buffer.dirino;
 
 out_close:
 	if (file.f_op->release)
@@ -400,707 +115,251 @@
 	return error;
 }
 
-/*
- * Look up a dentry given inode and parent inode numbers.
- *
- * This relies on the ability of a Unix-like filesystem to return
- * the parent inode of a directory as the ".." (second) entry.
- *
- * This could be further optimized if we had an efficient way of
- * searching for a dentry given the inode: as we walk up the tree,
- * it's likely that a dentry exists before we reach the root.
- */
-struct dentry * lookup_inode(kdev_t dev, ino_t dirino, ino_t ino)
-{
-	struct super_block *sb;
-	struct dentry *root, *dentry, *result;
-	struct inode *dir;
-	char *name;
-	unsigned long page;
-	ino_t root_ino;
-	int error;
-	struct nfsd_dirent dirent;
-
-	result = ERR_PTR(-ENOMEM);
-	page = __get_free_page(GFP_KERNEL);
-	if (!page)
-		goto out;
-
-	/*
-	 * Get the root dentry for the device.
-	 */
-	result = ERR_PTR(-ENOENT);
-	sb = get_super(dev);
-	if (!sb)
-		goto out_page;
-	root = dget(sb->s_root);
-	root_ino = root->d_inode->i_ino; /* usually 2 */
-
-	name = (char *) page + PAGE_SIZE;
-	*(--name) = 0;
-
-	/*
-	 * Walk up the tree to construct the name string.
-	 * When we reach the root inode, look up the name
-	 * relative to the root dentry.
-	 */
-	while (1) {
-		if (ino == root_ino) {
-			if (*name == '/')
-				name++;
-			/*
-			 * Note: this dput()s the root dentry.
-			 */
-			result = lookup_dentry(name, root, 0);
-			break;
-		}
-
-		/*
-		 *  Fix for /// bad export bug: if dirino is the root,
-		 *  get the real root dentry rather than creating a temporary
-		 *  "root" dentry.  XXX We could extend this to use
-		 *  any existing dentry for the located 'dir', but all
-		 *  of this code is going to be completely rewritten soon,
-		 *  so I won't bother. 
-		 */
-
-		if (dirino == root_ino) {
-			dentry = dget(root);
-		}
-		else {
-			result = ERR_PTR(-ENOENT);
-			dir = iget_in_use(sb, dirino);
-			if (!dir)
-				goto out_root;
-			dentry = d_alloc_root(dir, NULL);
-			if (!dentry)
-				goto out_iput;
-		}
-
-		/*
-		 * Get the name for this inode and the next parent inode.
-		 */
-		dirent.ino = ino;
-		error = get_parent_ino(dentry, &dirent);
-		result = ERR_PTR(error);
-		dput(dentry);
-		if (error)
-			goto out_root;
-		/*
-		 * Prepend the name to the buffer.
-		 */
-		result = ERR_PTR(-ENAMETOOLONG);
-		name -= (dirent.len + 1);
-		if ((unsigned long) name <= page)
-			goto out_root;
-		memcpy(name + 1, dirent.name, dirent.len);
-		*name = '/';
-
-		/*
-		 * Make sure we can't get caught in a loop ...
-		 */
-		if (dirino == dirent.ino && dirino != root_ino) {
-			printk(KERN_DEBUG 
-			       "lookup_inode: looping?? (ino=%ld, path=%s)\n",
-				dirino, name);	
-			goto out_root;
-		}
-		ino = dirino;
-		dirino = dirent.ino;
-	}
-
-out_page:
-	free_page(page);
-out:
-	return result;
-
-	/*
-	 * Error exits ...
-	 */
-out_iput:
-	result = ERR_PTR(-ENOMEM);
-	iput(dir);
-out_root:
-	dput(root);
-	goto out_page;
-}
-
-/*
- * Find an entry in the cache matching the given dentry pointer.
- */
-static struct fh_entry *find_fhe(struct dentry *dentry, int cache,
-				struct fh_entry **empty)
-{
-	struct fh_entry *fhe;
-	int i, found = (empty == NULL) ? 1 : 0;
-
-	if (!dentry)
-		goto out;
-
-	fhe = (cache == NFSD_FILE_CACHE) ? &filetable[0] : &dirstable[0];
-	for (i = 0; i < NFSD_MAXFH; i++, fhe++) {
-		if (fhe->dentry == dentry) {
-			fhe->reftime = jiffies;
-			return fhe;
-		}
-		if (!found && !fhe->dentry) {
-			found = 1;
-			*empty = fhe;
-		}
-	}
-out:
-	return NULL;
-}
-
-/*
- * Expire a cache entry.
- */
-static void expire_fhe(struct fh_entry *empty, int cache)
-{
-	struct dentry *dentry = empty->dentry;
-
-#ifdef NFSD_DEBUG_VERBOSE
-printk("expire_fhe: expiring %s %s/%s, d_count=%d, ino=%lu\n",
-(cache == NFSD_FILE_CACHE) ? "file" : "dir",
-dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count,empty->ino);
-#endif
-	empty->dentry = NULL;	/* no dentry */
-	/*
-	 * Add the parent to the dir cache before releasing the dentry,
-	 * and check whether to save a copy of the dentry's path.
-	 */
-	if (dentry != dentry->d_parent) {
-		struct dentry *parent = dget(dentry->d_parent);
-		if (add_to_fhcache(parent, NFSD_DIR_CACHE))
-			nfsd_nr_verified++;
-		else
-			dput(parent);
-		/*
-		 * If we're expiring a directory, copy its path.
-		 */
-		if (cache == NFSD_DIR_CACHE) {
-			add_to_path_cache(dentry);
-		}
-	}
-	dput(dentry);
-	nfsd_nr_put++;
-}
-
-/*
- * Look for an empty slot, or select one to expire.
- */
-static void expire_slot(int cache)
-{
-	struct fh_entry *fhe, *empty = NULL;
-	unsigned long oldest = -1;
-	int i;
-
-	fhe = (cache == NFSD_FILE_CACHE) ? &filetable[0] : &dirstable[0];
-	for (i = 0; i < NFSD_MAXFH; i++, fhe++) {
-		if (!fhe->dentry)
-			goto out;
-		if (fhe->reftime < oldest) {
-			oldest = fhe->reftime;
-			empty = fhe;
-		}
-	}
-	if (empty)
-		expire_fhe(empty, cache);
-
-out:
-	return;
-}
-
-/*
- * Expire any cache entries older than a certain age.
+/* this should be provided by each filesystem in an nfsd_operations interface as
+ * iget isn't really the right interface
  */
-static void expire_old(int cache, int age)
+static inline struct dentry *nfsd_iget(struct super_block *sb, unsigned long ino, __u32 generation)
 {
-	struct fh_entry *fhe;
-	int i;
-
-#ifdef NFSD_DEBUG_VERY_VERBOSE
-printk("expire_old: expiring %s older than %d\n",
-(cache == NFSD_FILE_CACHE) ? "file" : "dir", age);
-#endif
-	fhe = (cache == NFSD_FILE_CACHE) ? &filetable[0] : &dirstable[0];
-	for (i = 0; i < NFSD_MAXFH; i++, fhe++) {
-		if (!fhe->dentry)
-			continue;
-		if ((jiffies - fhe->reftime) > age)
-			expire_fhe(fhe, cache);
-	}
 
-	/*
-	 * Trim the fixup cache ...
+	/* iget isn't really right if the inode is currently unallocated!!
+	 * This should really all be done inside each filesystem
+	 *
+	 * ext2fs' read_inode has been strengthed to return a bad_inode if the inode
+	 *   had been deleted.
+	 *
+	 * Currently we don't know the generation for parent directory, so a generation
+	 * of 0 means "accept any"
 	 */
-	while (nfsd_nr_fixups > NFSD_MAX_FIXUPS) {
-		struct nfsd_fixup *fp;
-		fp = list_entry(fixup_head.prev, struct nfsd_fixup, lru);
-		if ((jiffies - fp->reftime) < NFSD_MAX_FIXUP_AGE)
-			break;
-		free_fixup_entry(fp);
-	}
-
-	/*
-	 * Trim the path cache ...
-	 */
-	while (nfsd_nr_paths > NFSD_MAX_PATHS) {
-		struct nfsd_path *pe;
-		pe = list_entry(path_inuse.prev, struct nfsd_path, lru);
-		if (pe->users)
-			break;
-		free_path_entry(pe);
-	}
-}
-
-/*
- * Add a dentry to the file or dir cache.
- *
- * Note: As NFS file handles must have an inode, we don't accept
- * negative dentries.
- */
-static int add_to_fhcache(struct dentry *dentry, int cache)
-{
-	struct fh_entry *fhe, *empty = NULL;
-	struct inode *inode = dentry->d_inode;
-
+	struct inode *inode;
+	struct list_head *lp;
+	struct dentry *result;
+	inode = iget_in_use(sb, ino);
 	if (!inode) {
-#ifdef NFSD_PARANOIA
-printk("add_to_fhcache: %s/%s rejected, no inode!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-		return 0;
-	}
-
-repeat:
-	fhe = find_fhe(dentry, cache, &empty);
-	if (fhe) {
-		return 0;
-	}
-
-	/*
-	 * Not found ... make a new entry.
-	 */
-	if (empty) {
-		empty->dentry = dentry;
-		empty->reftime = jiffies;
-		empty->ino = inode->i_ino;
-		empty->dev = inode->i_dev;
-		return 1;
-	}
-
-	expire_slot(cache);
-	goto repeat;
-}
-
-/*
- * Find an entry in the dir cache for the specified inode number.
- */
-static struct fh_entry *find_fhe_by_ino(kdev_t dev, ino_t ino)
-{
-	struct fh_entry * fhe = &dirstable[0];
-	int i;
+		dprintk("nfsd_iget: failed to find ino: %lu on %s\n",
+			ino, bdevname(sb->s_dev));
+		return ERR_PTR(-ENOENT);
+	}
+	if (is_bad_inode(inode)
+	    || (generation && inode->i_generation != generation)
+		) {
+		/* we didn't find the right inode.. */
+		dprintk("nfsd_iget: %s: Inode %lu, Bad count: %d, %d or version %u, %u\n",
+			bdevname(inode->i_sb->s_dev),
+			inode->i_ino,
+			inode->i_nlink, inode->i_count,
+			inode->i_generation,
+			generation);
 
-	for (i = 0; i < NFSD_MAXFH; i++, fhe++) {
-		if (fhe->ino == ino && fhe->dev == dev) {
-			fhe->reftime = jiffies;
-			return fhe;
-		}
+		iput(inode);
+		return NULL;
 	}
-	return NULL;
-}
-
-/*
- * Find the (directory) dentry with the specified (dev, inode) number.
- * Note: this leaves the dentry in the cache.
- */
-static struct dentry *find_dentry_by_ino(kdev_t dev, ino_t ino)
-{
-	struct fh_entry *fhe;
-	struct nfsd_path *pe;
-	struct dentry * dentry;
-
-#ifdef NFSD_DEBUG_VERBOSE
-printk("find_dentry_by_ino: looking for inode %ld\n", ino);
-#endif
-	/*
-	 * Special case: inode number 2 is the root inode,
-	 * so we can use the root dentry for the device.
+	/* now to find a dentry.
+	 * If possible, get a well-connected one
 	 */
-	if (ino == 2) {
-		struct super_block *sb = get_super(dev);
-		if (sb) {
-#ifdef NFSD_PARANOIA
-printk("find_dentry_by_ino: getting root dentry for %s\n", kdevname(dev));
-#endif
-			if (sb->s_root) {
-				dentry = dget(sb->s_root);
-				goto out;
-			} else {
-#ifdef NFSD_PARANOIA
-				printk("find_dentry_by_ino: %s has no root??\n",
-					kdevname(dev));
-#endif
-			}
+	for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
+		result = list_entry(lp,struct dentry, d_alias);
+		if (! IS_ROOT(result) || inode->i_sb->s_root == result) {
+			dget(result);
+			iput(inode);
+			return result;
 		}
 	}
-
-	/*
-	 * Search the dentry cache ...
-	 */
-	fhe = find_fhe_by_ino(dev, ino);
-	if (fhe) {
-		dentry = dget(fhe->dentry);
-		goto out;
-	}
-	/*
-	 * Search the path cache ...
-	 */
-	dentry = NULL;
-	pe = get_path_entry(dev, ino);
-	if (pe) {
-		struct dentry *res;
-		res = lookup_dentry(pe->name, NULL, 0);
-		if (!IS_ERR(res)) {
-			struct inode *inode = res->d_inode;
-			if (inode && inode->i_ino == ino &&
-				     inode->i_dev == dev) {
-				dentry = res;
-#ifdef NFSD_PARANOIA
-printk("find_dentry_by_ino: found %s/%s, ino=%ld\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, ino);
-#endif
-				if (add_to_fhcache(dentry, NFSD_DIR_CACHE)) {
-					dget(dentry);
-					nfsd_nr_verified++;
-				}
-				put_path(pe);
-			} else {
-				dput(res);
-				put_path(pe);
-				/* We should delete it from the cache. */
-				free_path_entry(pe);
-			}
-		} else {
-#ifdef NFSD_PARANOIA
-printk("find_dentry_by_ino: %s lookup failed\n", pe->name);
-#endif
-			put_path(pe);
-			/* We should delete it from the cache. */
-			free_path_entry(pe);
-		}
+	result = d_alloc_root(inode, NULL);
+	if (result == NULL) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
 	}
-out:
-	return dentry;
+	d_rehash(result); /* so a dput won't loose it */
+	return result;
 }
 
-/*
- * Look for an entry in the file cache matching the dentry pointer,
- * and verify that the (dev, inode) numbers are correct. If found,
- * the entry is removed from the cache.
+/* this routine links an IS_ROOT dentry into the dcache tree.  It gains "parent"
+ * as a parent and "name" as a name
+ * It should possibly go in dcache.c
  */
-static struct dentry *find_dentry_in_fhcache(struct knfs_fh *fh)
+int d_splice(struct dentry *target, struct dentry *parent, struct qstr *name)
 {
-/* FIXME: this must use the dev/ino/dir_ino triple. */ 
-#if 0
-	struct fh_entry * fhe;
-
-	fhe = find_fhe(fh->fh_dcookie, NFSD_FILE_CACHE, NULL);
-	if (fhe) {
-		struct dentry *parent, *dentry;
-		struct inode *inode;
-
-		dentry = fhe->dentry;
-		inode = dentry->d_inode;
-
-		if (!inode) {
+	struct dentry *tdentry;
 #ifdef NFSD_PARANOIA
-printk("find_dentry_in_fhcache: %s/%s has no inode!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-			goto out;
-		}
-		if (inode->i_ino != u32_to_ino_t(fh->fh_ino))
-			goto out;
- 		if (inode->i_dev != u32_to_kdev_t(fh->fh_dev))
-			goto out;
-
-		fhe->dentry = NULL;
-		fhe->ino = 0;
-		fhe->dev = 0;
-		nfsd_nr_put++;
-		/*
-		 * Make sure the parent is in the dir cache ...
-		 */
-		parent = dget(dentry->d_parent);
-		if (add_to_fhcache(parent, NFSD_DIR_CACHE))
-			nfsd_nr_verified++;
-		else
-			dput(parent);
-		return dentry;
-	}
-out:
-#endif
-	return NULL;
-}
-
-/*
- * Look for an entry in the parent directory with the specified
- * inode number.
- */
-static struct dentry *lookup_by_inode(struct dentry *parent, ino_t ino)
-{
-	struct dentry *dentry;
-	int error;
-	struct nfsd_dirent dirent;
-
-	/*
-	 * Search the directory for the inode number.
-	 */
-	dirent.ino = ino;
-	error = get_parent_ino(parent, &dirent);
-	if (error) {
-#ifdef NFSD_PARANOIA_EXTREME
-printk("lookup_by_inode: ino %ld not found in %s\n", ino, parent->d_name.name);
-#endif
-		goto no_entry;
-	}
-#ifdef NFSD_PARANOIA_EXTREME
-printk("lookup_by_inode: found %s\n", dirent.name);
-#endif
-
-	dentry = lookup_dentry(dirent.name, parent, 0);
-	if (!IS_ERR(dentry)) {
-		if (dentry->d_inode && dentry->d_inode->i_ino == ino)
-			goto out;
-#ifdef NFSD_PARANOIA_EXTREME
-printk("lookup_by_inode: %s/%s inode mismatch??\n",
-parent->d_name.name, dentry->d_name.name);
+	if (!IS_ROOT(target))
+		printk("nfsd: d_splice with no-root target: %s/%s\n", parent->d_name.name, name->name);
 #endif
-		dput(dentry);
-	} else {
-#ifdef NFSD_PARANOIA_EXTREME
-printk("lookup_by_inode: %s lookup failed, error=%ld\n",
-dirent.name, PTR_ERR(dentry));
-#endif
-	}
-
-no_entry:
-	dentry = NULL;
-out:
-	return dentry;
-}
-
-/*
- * Search the fix-up list for a dentry from a prior lookup.
- */
-static ino_t nfsd_cached_lookup(struct knfs_fh *fh)
-{
-	struct nfsd_fixup *fp;
-
-	fp = find_cached_lookup(u32_to_kdev_t(fh->fh_dev),
-				u32_to_ino_t(fh->fh_dirino),
-				u32_to_ino_t(fh->fh_ino));
-	if (fp)
-		return fp->new_dirino;
+	name->hash = full_name_hash(name->name, name->len);
+	tdentry = d_alloc(parent, name);
+	if (tdentry == NULL)
+		return -ENOMEM;
+	d_move(target, tdentry);
+
+	/* tdentry will have been made a "child" of target (the parent of target)
+	 * make it an IS_ROOT instead
+	 */
+	list_del(&tdentry->d_child);
+	tdentry->d_parent = tdentry;
+	d_rehash(target);
+	dput(tdentry);
 	return 0;
 }
 
-void
-expire_all(void)
-{
- 	if (time_after_eq(jiffies, nfsd_next_expire)) {
- 		expire_old(NFSD_FILE_CACHE,  5*HZ);
- 		expire_old(NFSD_DIR_CACHE , 60*HZ);
- 		nfsd_next_expire = jiffies + 5*HZ;
- 	}
-}
-
-/* 
- * Free cache after unlink/rmdir.
- */
-void
-expire_by_dentry(struct dentry *dentry)
-{
-	struct fh_entry *fhe;
-
-	fhe = find_fhe(dentry, NFSD_FILE_CACHE, NULL);
-	if (fhe) {
-		expire_fhe(fhe, NFSD_FILE_CACHE);
-	}
-	fhe = find_fhe(dentry, NFSD_DIR_CACHE, NULL);
-	if (fhe) {
-		expire_fhe(fhe, NFSD_DIR_CACHE);
-	}
+/* this routine finds the dentry of the parent of a given directory
+ * it should be in the filesystem accessed by nfsd_operations
+ * it assumes lookup("..") works.
+ */
+struct dentry *nfsd_findparent(struct dentry *child)
+{
+	struct dentry *tdentry, *pdentry;
+	tdentry = d_alloc(child, &(const struct qstr) {"..", 2, 0});
+	if (!tdentry)
+		return ERR_PTR(-ENOMEM);
+
+	/* I'm going to assume that if the returned dentry is different, then
+	 * it is well connected.  But nobody returns different dentrys do they?
+	 */
+	pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry);
+	if (!pdentry) {
+		/* I don't want to return a ".." dentry.
+		 * I would prefer to return an unconnected "IS_ROOT" dentry,
+		 * though a properly connected dentry is even better
+		 */
+		/* if first or last of alias list is not tdentry, use that
+		 * else make a root dentry
+		 */
+		struct list_head *aliases = &tdentry->d_inode->i_dentry;
+		if (aliases->next != aliases) {
+			pdentry = list_entry(aliases->next, struct dentry, d_alias);
+			if (pdentry == tdentry)
+				pdentry = list_entry(aliases->prev, struct dentry, d_alias);
+			if (pdentry == tdentry)
+				pdentry = NULL;
+			if (pdentry) dget(pdentry);
+		}
+		if (pdentry == NULL) {
+			pdentry = d_alloc_root(igrab(tdentry->d_inode),NULL);
+			if (pdentry) d_rehash(pdentry);
+		}
+		if (pdentry == NULL)
+			pdentry = ERR_PTR(-ENOMEM);
+	}
+	dput(tdentry); /* it was never rehashed, it will be discarded */
+	return pdentry;
 }
 
 /*
- * The is the basic lookup mechanism for turning an NFS file handle 
- * into a dentry. There are several levels to the search:
- * (1) Look for the dentry pointer the short-term fhcache,
- *     and verify that it has the correct inode number.
- *
- * (2) Try to validate the dentry pointer in the file handle,
- *     and verify that it has the correct inode number. If this
- *     fails, check for a cached lookup in the fix-up list and
- *     repeat step (2) using the new dentry pointer.
- *
- * (3) Look up the dentry by using the inode and parent inode numbers
- *     to build the name string. This should succeed for any Unix-like
- *     filesystem.
- *
- * (4) Search for the parent dentry in the dir cache, and then
- *     look for the name matching the inode number.
- *
- * (5) The most general case ... search the whole volume for the inode.
- *
- * If successful, we return a dentry with the use count incremented.
- *
- * Note: steps (4) and (5) above are probably unnecessary now that (3)
- * is working. Remove the code once this is verified ...
+ * This is the basic lookup mechanism for turning an NFS file handle
+ * into a dentry.
+ * We use nfsd_iget and if that doesn't return a suitably connected dentry,
+ * we try to find the parent, and the parent of that and so-on until a
+ * connection if made.
  */
 static struct dentry *
-find_fh_dentry(struct knfs_fh *fh)
+find_fh_dentry(struct super_block *sb, struct knfs_fh *fh, int needpath)
 {
-	struct super_block *sb;
-	struct dentry *dentry, *parent;
-	struct inode * inode;
-	struct list_head *lst;
-	int looked_up = 0, retry = 0;
-	ino_t dirino;
-
-	/*
-	 * Stage 1: Look for the dentry in the short-term fhcache.
-	 */
-	dentry = find_dentry_in_fhcache(fh);
-	if (dentry) {
-		nfsdstats.fh_cached++;
-		goto out;
-	}
-	/*
-	 * Stage 2: Attempt to find the inode.
-	 */
-	sb = get_super(fh->fh_dev);
-	if (NULL == sb) {
-		printk("find_fh_dentry: No SuperBlock for device %s.",
-		       kdevname(fh->fh_dev));
-		dentry = NULL;
-		goto out;
-	}
-
-	dirino = u32_to_ino_t(fh->fh_dirino);
-	inode = iget_in_use(sb, fh->fh_ino);
-	if (!inode) {
-		dprintk("find_fh_dentry: No inode found.\n");
-		goto out_five;
-	}
-	goto check;
-recheck:
-	if (!inode) {
+	struct dentry *dentry, *result = NULL;
+	struct qstr qs;
+	char namebuf[256];
+	int  found =0;
+	u32 err;
+
+	qs.name = namebuf;
+	/*
+	 * Attempt to find the inode.
+	 */
+	result = nfsd_iget(sb, fh->fh_ino, fh->fh_generation);
+	err = PTR_ERR(result);
+	if (IS_ERR(result))
+		goto err_out;
+	err = -ESTALE;
+	if (!result) {
 		dprintk("find_fh_dentry: No inode found.\n");
-		goto out_three;
+		goto err_out;
 	}
-check:
-	for (lst = inode->i_dentry.next;
-	     lst != &inode->i_dentry;
-	     lst = lst->next) {
-		dentry = list_entry(lst, struct dentry, d_alias);
-
-/* if we are looking up a directory then we don't need the parent! */
-		if (!dentry ||
-		    !dentry->d_parent ||
-		    !dentry->d_parent->d_inode) {
-printk("find_fh_dentry: Found a useless inode %lu\n", inode->i_ino);
-			continue;
-		}
-		if (dentry->d_parent->d_inode->i_ino != dirino)
-			continue;
+	if (!IS_ROOT(result) || result->d_inode->i_sb->s_root ==result)
+		return result;
 
-		dget(dentry);
-		iput(inode);
-#ifdef NFSD_DEBUG_VERBOSE
-		printk("find_fh_dentry: Found%s %s/%s filehandle dirino = %lu, %lu\n",
-		       retry ? " Renamed" : "",
-		       dentry->d_parent->d_name.name,
-		       dentry->d_name.name,
-		       dentry->d_parent->d_inode->i_ino,
-		       dirino);
-#endif
-		goto out;
-	} /* for inode->i_dentry */
+	/* result is now a "root" dentry, which may be adequate as it stands, or else
+	 * will get spliced into the dcache tree */
 
-	/*
-	 * Before proceeding to a lookup, check for a rename
-	 */
-	if (!retry && (dirino = nfsd_cached_lookup(fh))) {
-		dprintk("find_fh_dentry: retry with %lu\n", dirino);
-		retry = 1;
-		goto recheck;
+	if (!S_ISDIR(result->d_inode->i_mode) && ! needpath) {
+		return result;
 	}
-
-	iput(inode);
-
-	dprintk("find_fh_dentry: dirino not found %lu\n", dirino);
-
-out_three:
-
-	/*
-	 * Stage 3: Look up the dentry based on the inode and parent inode
-	 * numbers. This should work for all Unix-like filesystems.
+	/* It's a directory, or we are required to confirm the file's
+	 * location in the tree.
 	 */
-	looked_up = 1;
-	dentry = lookup_inode(u32_to_kdev_t(fh->fh_dev),
-			      u32_to_ino_t(fh->fh_dirino),
-			      u32_to_ino_t(fh->fh_ino));
-	if (!IS_ERR(dentry)) {
-		struct inode * inode = dentry->d_inode;
-#ifdef NFSD_DEBUG_VERBOSE
-printk("find_fh_dentry: looked up %s/%s\n",
-       dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-		if (inode && inode->i_ino == u32_to_ino_t(fh->fh_ino)) {
-			nfsdstats.fh_lookup++;
-			goto out;
+	dprintk("nfs_fh: need to look harder for %d/%d\n",sb->s_dev,fh->fh_ino);
+	found = 0;
+	if (!S_ISDIR(result->d_inode->i_mode)) {
+		if (fh->fh_dirino == 0)
+			goto err_result; /* don't know how to find parent */
+		else {
+			/* need to iget fh->fh_dirino and make sure this inode is in that directory */
+			dentry = nfsd_iget(sb, fh->fh_dirino, 0);
+			err = PTR_ERR(dentry);
+			if (IS_ERR(dentry))
+				goto err_result;
+			err = -ESTALE;
+			if (!dentry->d_inode
+			    || !S_ISDIR(dentry->d_inode->i_mode)) {
+				goto err_dentry;
+			}
+			if (!IS_ROOT(dentry) || dentry->d_inode->i_sb->s_root ==dentry)
+				found = 1;
+			err = get_ino_name(dentry, &qs, result->d_inode->i_ino);
+			if (err)
+				goto err_dentry;
+
+			/* OK, we have the name in parent of inode,  lets fill in the dentry */
+			err = d_splice(result, dentry, &qs);
+			if (err)
+				goto err_dentry;
+		}
+	}
+	else
+		dentry = dget(result);
+
+	while(!found) {
+		/* LOOP INVARIANT */
+		/* haven't found a place in the tree yet, but we do have a path
+		 * from dentry down to result, and dentry is a directory.
+		 * Have a hold on dentry and result */
+		struct dentry *pdentry;
+		struct inode *parent;
+
+		pdentry = nfsd_findparent(dentry);
+		err = PTR_ERR(pdentry);
+		if (IS_ERR(pdentry))
+			goto err_dentry;
+		parent = pdentry->d_inode;
+		err = -EACCES;
+		if (!parent) {
+			dput(pdentry);
+			goto err_dentry;
 		}
-#ifdef NFSD_PARANOIA
-printk("find_fh_dentry: %s/%s lookup mismatch!\n",
-       dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-		dput(dentry);
-	}
-
-	/*
-	 * Stage 4: Look for the parent dentry in the fhcache ...
-	 */
-	parent = find_dentry_by_ino(u32_to_kdev_t(fh->fh_dev),
-				    u32_to_ino_t(fh->fh_dirino));
-	if (parent) {
-		/*
-		 * ... then search for the inode in the parent directory.
+		/* I'm not sure that this is the best test for
+		 *  "is it not a floating dentry?"
 		 */
-		dget(parent);
-		dentry = lookup_by_inode(parent, u32_to_ino_t(fh->fh_ino));
-		dput(parent);
-		if (dentry)
-			goto out;
-	}
+		if (!IS_ROOT(pdentry) || parent->i_sb->s_root == pdentry)
+			found = 1;
 
-out_five:
+		err = get_ino_name(pdentry, &qs, dentry->d_inode->i_ino);
+		if (err) {
+			dput(pdentry);
+			goto err_dentry;
+		}
+		err = d_splice(dentry, pdentry, &qs);
+		dprintk("nfsd_fh: found name %s for ino %ld\n", dentry->d_name.name, dentry->d_inode->i_ino);
+		dput(dentry);
+		dentry = pdentry;
+	}
+	dput(dentry);
+	return result;
 
-	/*
-	 * Stage 5: Search the whole volume, Yea Right.
-	 */
-#ifdef NFSD_PARANOIA_EXTREME
-printk("find_fh_dentry: %s/%u dir/%u not found!\n",
-       kdevname(u32_to_kdev_t(fh->fh_dev)), fh->fh_ino, fh->fh_dirino);
-#endif
-	dentry = NULL;
-	nfsdstats.fh_stale++;
-	
-out:
-	expire_all();
-	return dentry;
+err_dentry:
+	dput(dentry);
+err_result:
+	dput(result);
+err_out:
+	if (err == -ESTALE)
+		nfsdstats.fh_stale++;
+	return ERR_PTR(err);
 }
 
 /*
@@ -1108,6 +367,9 @@
  *
  * Note that the file handle dentry may need to be freed even after
  * an error return.
+ *
+ * This is only called at the start of an nfsproc call, so fhp points to
+ * a svc_fh which is all 0 except for the over-the-wire file handle.
  */
 u32
 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
@@ -1118,64 +380,79 @@
 	struct inode	*inode;
 	u32		error = 0;
 
-	dprintk("nfsd: fh_verify(exp %s/%u file (%s/%u dir %u)\n",
-		kdevname(fh->fh_xdev),
+	dprintk("nfsd: fh_verify: export (%s/%u) file (%s/%u dir %u) (0x%x, 0x%x)\n",
+		bdevname(fh->fh_xdev),
 		fh->fh_xino,
-		kdevname(fh->fh_dev),
+		bdevname(fh->fh_dev),
 		fh->fh_ino,
-		fh->fh_dirino);
+		fh->fh_dirino,
+		type, access);
 
-	if (fhp->fh_dverified)
-		goto check_type;
-	/*
-	 * Look up the export entry.
-	 */
-	error = nfserr_stale;
-	exp = exp_get(rqstp->rq_client,
-		      u32_to_kdev_t(fh->fh_xdev),
-		      u32_to_ino_t(fh->fh_xino));
-	if (!exp) {
-		/* export entry revoked */
-		nfsdstats.fh_stale++;
-		goto out;
-	}
+	if (!fhp->fh_dverified) {
+		/*
+		 * Security: Check that the fh is internally consistant (from <gam3@acm.org>)
+		 */
+		if (fh->fh_dev != fh->fh_xdev) {
+			printk("fh_verify: Security: export on other device (%s, %s).\n",
+			       bdevname(fh->fh_dev), bdevname(fh->fh_xdev));
+			error = nfserr_stale;
+			nfsdstats.fh_stale++;
+			goto out;
+		}
 
-	/* Check if the request originated from a secure port. */
-	error = nfserr_perm;
-	if (!rqstp->rq_secure && EX_SECURE(exp)) {
-		printk(KERN_WARNING
-			"nfsd: request from insecure port (%08x:%d)!\n",
-				ntohl(rqstp->rq_addr.sin_addr.s_addr),
-				ntohs(rqstp->rq_addr.sin_port));
-		goto out;
-	}
+		/*
+		 * Look up the export entry.
+		 */
+		error = nfserr_stale;
+		exp = exp_get(rqstp->rq_client,
+			      u32_to_kdev_t(fh->fh_xdev),
+			      u32_to_ino_t(fh->fh_xino));
+		if (!exp) {
+			/* export entry revoked */
+			nfsdstats.fh_stale++;
+			goto out;
+		}
 
-	/* Set user creds if we haven't done so already. */
-	nfsd_setuser(rqstp, exp);
+		/* Check if the request originated from a secure port. */
+		error = nfserr_perm;
+		if (!rqstp->rq_secure && EX_SECURE(exp)) {
+			printk(KERN_WARNING
+			       "nfsd: request from insecure port (%08lx:%d)!\n",
+			       (long)ntohl(rqstp->rq_addr.sin_addr.s_addr),
+			       ntohs(rqstp->rq_addr.sin_port));
+			goto out;
+		}
 
-	/*
-	 * Look up the dentry using the NFS file handle.
-	 */
-	error = nfserr_noent;
-	dentry = find_fh_dentry(fh);
-	if (!dentry) {
-		goto out;
-	}
-	if (IS_ERR(dentry)) {
-		error = nfserrno(-PTR_ERR(dentry));
-		goto out;
+		/* Set user creds if we haven't done so already. */
+		nfsd_setuser(rqstp, exp);
+
+		/*
+		 * Look up the dentry using the NFS file handle.
+		 */
+
+		dentry = find_fh_dentry(exp->ex_dentry->d_inode->i_sb,
+					fh,
+					!(exp->ex_flags & NFSEXP_NOSUBTREECHECK));
+
+		if (IS_ERR(dentry)) {
+			error = nfserrno(-PTR_ERR(dentry));
+			goto out;
+		}
+
+		fhp->fh_dentry = dentry;
+		fhp->fh_export = exp;
+		fhp->fh_dverified = 1;
+		nfsd_nr_verified++;
+	} else {
+		/* just rechecking permissions
+		 * (e.g. nfsproc_create calls fh_verify, then nfsd_create does as well)
+		 */
+		dprintk("nfsd: fh_verify - just checking\n");
+		dentry = fhp->fh_dentry;
+		exp = fhp->fh_export;
 	}
 
-	/*
-	 * Note:  it's possible the returned dentry won't be the one in the
-	 * file handle.  We can correct the file handle for our use, but
-	 * unfortunately the client will keep sending the broken one.  Let's
-	 * hope the lookup will keep patching things up.
-	 */
-	fhp->fh_dentry = dentry;
-	fhp->fh_export = exp;
-	fhp->fh_dverified = 1;
-	nfsd_nr_verified++;
+	inode = dentry->d_inode;
 
 	/* Type check. The correct error return for type mismatches
 	 * does not seem to be generally agreed upon. SunOS seems to
@@ -1183,39 +460,8 @@
 	 * spec says this is incorrect (implementation notes for the
 	 * write call).
 	 */
-check_type:
-	dentry = fhp->fh_dentry;
-	inode = dentry->d_inode;
-	error = nfserr_stale;
-	/* On a heavily loaded SMP machine, more than one identical
-	   requests may run at the same time on different processors.
-	   One thread may get here with unfinished fh after another
-	   thread just fetched the inode. It doesn't make any senses
-	   to check fh->fh_generation here since it has not been set
-	   yet. In that case, we shouldn't send back the stale
-	   filehandle to the client. We use fh->fh_dcookie to indicate
-	   if fh->fh_generation is set or not. If fh->fh_dcookie is
-	   not set, don't return stale filehandle. */
-	if (inode->i_generation != fh->fh_generation) {
-		if (fh->fh_dcookie) {
-			dprintk("fh_verify: Bad version %lu %u %u: 0x%x, 0x%x\n",
-				inode->i_ino,
-				inode->i_generation,
-				fh->fh_generation,
-				type, access);
-			nfsdstats.fh_stale++;
-			goto out;
-		}
-		else {
-			/* We get here when inode is fetched by other
-			   threads. We just use what is in there. */
-			fh->fh_ino = ino_t_to_u32(inode->i_ino);
-			fh->fh_generation = inode->i_generation;
-			fh->fh_dcookie = (struct dentry *)0xfeebbaca;
-			nfsdstats.fh_concurrent++;
-		}
-	}
-	exp = fhp->fh_export;
+
+	/* When is type ever negative? */
 	if (type > 0 && (inode->i_mode & S_IFMT) != type) {
 		error = (type == S_IFDIR)? nfserr_notdir : nfserr_isdir;
 		goto out;
@@ -1229,38 +475,37 @@
 	 * Security: Check that the export is valid for dentry <gam3@acm.org>
 	 */
 	error = 0;
-	if (fh->fh_dev != fh->fh_xdev) {
-		printk("fh_verify: Security: export on other device (%s, %s).\n",
-		       kdevname(fh->fh_dev), kdevname(fh->fh_xdev));
-		error = nfserr_stale;
-		nfsdstats.fh_stale++;
-	} else if (exp->ex_dentry != dentry) {
-		struct dentry *tdentry = dentry;
 
-		do {
-			tdentry = tdentry->d_parent;
-			if (exp->ex_dentry == tdentry)
-				break;
-			/* executable only by root and we can't be root */
-			if (current->fsuid
-			    && !(tdentry->d_inode->i_uid
-			         && (tdentry->d_inode->i_mode & S_IXUSR))
-			    && !(tdentry->d_inode->i_gid
-				 && (tdentry->d_inode->i_mode & S_IXGRP))
-			    && !(tdentry->d_inode->i_mode & S_IXOTH)
-			    && (exp->ex_flags & NFSEXP_ROOTSQUASH)) {
+	if (!(exp->ex_flags & NFSEXP_NOSUBTREECHECK)) {
+		if (exp->ex_dentry != dentry) {
+			struct dentry *tdentry = dentry;
+
+			do {
+				tdentry = tdentry->d_parent;
+				if (exp->ex_dentry == tdentry)
+					break;
+				/* executable only by root and we can't be root */
+				if (current->fsuid
+				    && (exp->ex_flags & NFSEXP_ROOTSQUASH)
+				    && !(tdentry->d_inode->i_uid
+					 && (tdentry->d_inode->i_mode & S_IXUSR))
+				    && !(tdentry->d_inode->i_gid
+					 && (tdentry->d_inode->i_mode & S_IXGRP))
+				    && !(tdentry->d_inode->i_mode & S_IXOTH)
+					) {
+					error = nfserr_stale;
+					nfsdstats.fh_stale++;
+					dprintk("fh_verify: no root_squashed access.\n");
+				}
+			} while ((tdentry != tdentry->d_parent));
+			if (exp->ex_dentry != tdentry) {
 				error = nfserr_stale;
 				nfsdstats.fh_stale++;
-dprintk("fh_verify: no root_squashed access.\n");
+				printk("nfsd Security: %s/%s bad export.\n",
+				       dentry->d_parent->d_name.name,
+				       dentry->d_name.name);
+				goto out;
 			}
-		} while ((tdentry != tdentry->d_parent));
-		if (exp->ex_dentry != tdentry) {
-			error = nfserr_stale;
-			nfsdstats.fh_stale++;
-			printk("nfsd Security: %s/%s bad export.\n",
-			       dentry->d_parent->d_name.name,
-			       dentry->d_name.name);
-			goto out;
 		}
 	}
 
@@ -1269,9 +514,10 @@
 		error = nfsd_permission(exp, dentry, access);
 	}
 #ifdef NFSD_PARANOIA
-if (error)
-printk("fh_verify: %s/%s permission failure, acc=%x, error=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, access, (error >> 24));
+	if (error) {
+		printk("fh_verify: %s/%s permission failure, acc=%x, error=%d\n",
+		       dentry->d_parent->d_name.name, dentry->d_name.name, access, (error >> 24));
+	}
 #endif
 out:
 	return error;
@@ -1306,16 +552,17 @@
 	}
 	fh_init(fhp);
 
-	fhp->fh_handle.fh_dcookie = dentry;
+	fhp->fh_handle.fh_dirino = ino_t_to_u32(parent->d_inode->i_ino);
+	fhp->fh_handle.fh_dev    = kdev_t_to_u32(parent->d_inode->i_dev);
+	fhp->fh_handle.fh_xdev   = kdev_t_to_u32(exp->ex_dev);
+	fhp->fh_handle.fh_xino   = ino_t_to_u32(exp->ex_ino);
+	fhp->fh_handle.fh_dcookie = (struct dentry *)0xfeebbaca;
 	if (inode) {
 		fhp->fh_handle.fh_ino = ino_t_to_u32(inode->i_ino);
 		fhp->fh_handle.fh_generation = inode->i_generation;
-		fhp->fh_handle.fh_dcookie = (struct dentry *)0xfeebbaca;
+		if (S_ISDIR(inode->i_mode) || (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
+			fhp->fh_handle.fh_dirino = 0;
 	}
-	fhp->fh_handle.fh_dirino = ino_t_to_u32(parent->d_inode->i_ino);
-	fhp->fh_handle.fh_dev	 = kdev_t_to_u32(parent->d_inode->i_dev);
-	fhp->fh_handle.fh_xdev	 = kdev_t_to_u32(exp->ex_dev);
-	fhp->fh_handle.fh_xino	 = ino_t_to_u32(exp->ex_ino);
 
 	fhp->fh_dentry = dentry; /* our internal copy */
 	fhp->fh_export = exp;
@@ -1327,6 +574,7 @@
 
 /*
  * Update file handle information after changing a dentry.
+ * This is only called by nfsd_create
  */
 void
 fh_update(struct svc_fh *fhp)
@@ -1343,7 +591,9 @@
 		goto out_negative;
 	fhp->fh_handle.fh_ino = ino_t_to_u32(inode->i_ino);
 	fhp->fh_handle.fh_generation = inode->i_generation;
-	fhp->fh_handle.fh_dcookie = (struct dentry *)0xfeebbaca;
+	if (S_ISDIR(inode->i_mode) || (fhp->fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
+		fhp->fh_handle.fh_dirino = 0;
+
 out:
 	return;
 
@@ -1357,8 +607,7 @@
 }
 
 /*
- * Release a file handle.  If the file handle carries a dentry count,
- * we add the dentry to the short-term cache rather than release it.
+ * Release a file handle.
  */
 void
 fh_put(struct svc_fh *fhp)
@@ -1369,10 +618,8 @@
 		fhp->fh_dverified = 0;
 		if (!dentry->d_count)
 			goto out_bad;
-		if (!dentry->d_inode || !add_to_fhcache(dentry, 0)) {
-			dput(dentry);
-			nfsd_nr_put++;
-		}
+		dput(dentry);
+		nfsd_nr_put++;
 	}
 	return;
 
@@ -1382,116 +629,3 @@
 	return;
 }
 
-/*
- * Flush any cached dentries for the specified device
- * or for all devices.
- *
- * This is called when revoking the last export for a
- * device, so that it can be unmounted cleanly.
- */
-void nfsd_fh_flush(kdev_t dev)
-{
-	struct fh_entry *fhe;
-	int i, pass = 2;
-
-	fhe = &filetable[0];
-	while (pass--) {
-		for (i = 0; i < NFSD_MAXFH; i++, fhe++) {
-			struct dentry *dentry = fhe->dentry;
-			if (!dentry)
-				continue;
-			if (dev && dentry->d_inode->i_dev != dev)
-				continue;
-			fhe->dentry = NULL;
-			dput(dentry);
-			nfsd_nr_put++;
-		}
-		fhe = &dirstable[0];
-	}
-}
-
-/*
- * Free the rename and path caches.
- */
-void nfsd_fh_free(void)
-{
-	struct list_head *tmp;
-	int i;
-
-	/* Flush dentries for all devices */
-	nfsd_fh_flush(0);
-
-	/*
-	 * N.B. write a destructor for these lists ...
-	 */
-	i = 0;
-	while ((tmp = fixup_head.next) != &fixup_head) {
-		struct nfsd_fixup *fp;
-		fp = list_entry(tmp, struct nfsd_fixup, lru);
-		free_fixup_entry(fp);
-		i++;
-	}
-	printk(KERN_DEBUG "nfsd_fh_free: %d fixups freed\n", i);
-
-	i = 0;
-	while ((tmp = path_inuse.next) != &path_inuse) {
-		struct nfsd_path *pe;
-		pe = list_entry(tmp, struct nfsd_path, lru);
-		free_path_entry(pe);
-		i++;
-	}
-	printk(KERN_DEBUG "nfsd_fh_free: %d paths freed\n", i);
-
-	printk(KERN_DEBUG "nfsd_fh_free: verified %d, put %d\n",
-		nfsd_nr_verified, nfsd_nr_put);
-}
-
-void nfsd_fh_init(void)
-{
-	extern void __my_nfsfh_is_too_big(void); 
-
-	if (filetable)
-		return;
-
-	/* Sanity check */ 
-	if (sizeof(struct nfs_fhbase) > 32) 
-		__my_nfsfh_is_too_big(); 
-
-	filetable = kmalloc(sizeof(struct fh_entry) * NFSD_MAXFH,
-			    GFP_KERNEL);
-	dirstable = kmalloc(sizeof(struct fh_entry) * NFSD_MAXFH,
-			    GFP_KERNEL);
-
-	if (filetable == NULL || dirstable == NULL) {
-		printk(KERN_WARNING "nfsd_fh_init : Could not allocate fhcache\n");
-		nfsd_nservers = 0;
-		return;
-	}
-
-	memset(filetable, 0, NFSD_MAXFH*sizeof(struct fh_entry));
-	memset(dirstable, 0, NFSD_MAXFH*sizeof(struct fh_entry));
-	INIT_LIST_HEAD(&path_inuse);
-	INIT_LIST_HEAD(&fixup_head);
-
-	printk(KERN_DEBUG 
-		"nfsd_fh_init : initialized fhcache, entries=%lu\n", NFSD_MAXFH);
-	/*
-	 * Display a warning if the ino_t is larger than 32 bits.
-	 */
-	if (sizeof(ino_t) > sizeof(__u32))
-		printk(KERN_INFO 
-			"NFSD: ino_t is %d bytes, using lower 4 bytes\n",
-			sizeof(ino_t));
-}
-
-void
-nfsd_fh_shutdown(void)
-{
-	if (!filetable)
-		return;
-	printk(KERN_DEBUG 
-		"nfsd_fh_shutdown : freeing %ld fhcache entries.\n", NFSD_MAXFH);
-	kfree(filetable);
-	kfree(dirstable);
-	filetable = dirstable = NULL;
-}
--- linux/fs/nfsd/nfssvc.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/nfsd/nfssvc.c	Fri Feb  4 23:37:24 2000
@@ -64,11 +64,9 @@
 	nfsd_nservers = nrservs;
 
 	error = -ENOMEM;
-	nfsd_fh_init();		/* NFS dentry cache */
 	if (nfsd_nservers == 0)
 		goto out;
 	  
-	error = -ENOMEM;
 	nfsd_racache_init();     /* Readahead param cache */
 	if (nfsd_nservers == 0)
 		goto out;
@@ -106,7 +104,7 @@
 nfsd(struct svc_rqst *rqstp)
 {
 	struct svc_serv	*serv = rqstp->rq_server;
-	int		oldumask, err, first = 0;
+	int		oldumask, err;
 
 	/* Lock module and set up kernel thread */
 	MOD_INC_USE_COUNT;
@@ -122,7 +120,6 @@
 	current->fs->umask = 0;
 	if (!nfsd_active++) {
 		nfssvc_boot = xtime;		/* record boot time */
-		first = 1;
 	}
 #if 0
 	lockd_up();				/* start lockd */
@@ -143,13 +140,8 @@
 		 * recvfrom routine.
 		 */
 		while ((err = svc_recv(serv, rqstp,
-		        first?5*HZ:MAX_SCHEDULE_TIMEOUT)) == -EAGAIN) {
-			if (first && 1) {
-				exp_readlock();
-				expire_all();
-				exp_unlock();
-			}
-		}
+				       MAX_SCHEDULE_TIMEOUT)) == -EAGAIN)
+		    ;
 		if (err < 0)
 			break;
 
@@ -192,8 +184,9 @@
 		printk("nfsd: last server exiting\n");
 		/* revoke all exports */
 		nfsd_export_shutdown();
-		/* release fhcache */
-		nfsd_fh_shutdown ();
+		/* release fhcache
+		 *  - deprecated --gaftonc */
+		/* nfsd_fh_shutdown (); */
 		/* release read-ahead cache */
 	        nfsd_racache_shutdown();
 	}
--- linux/fs/nfsd/vfs.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/nfsd/vfs.c	Fri Feb  4 23:26:34 2000
@@ -142,6 +142,14 @@
 /*
  * Look up one component of a pathname.
  * N.B. After this call _both_ fhp and resfh need an fh_put
+ *
+ * If the lookup would cross a mountpoint, and the mounted filesystem
+ * is exported to the client with NFSEXP_CROSSMNT, then the lookup is
+ * accepted as it stands and the mounted directory is
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ *   clients and is explicitly disallowed for NFSv3
+ *      NeilBrown <neilb@cse.unsw.edu.au>
  */
 int
 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
@@ -181,18 +189,27 @@
 	 * check if we have crossed a mount point ...
 	 */
 	if (dchild->d_sb != dparent->d_sb) {
-		struct dentry *tdentry;
-		tdentry = dchild->d_covers;
-		if (tdentry == dchild)
-			goto out_dput;
-	        dput(dchild);
-		dchild = dget(tdentry);
-	        if (dchild->d_sb != dparent->d_sb) {
-printk("nfsd_lookup: %s/%s crossed mount point!\n", dparent->d_name.name, dchild->d_name.name);
-			goto out_dput;
+		struct svc_export *exp2 = NULL;
+		exp2 = exp_get(rqstp->rq_client,
+			       dchild->d_inode->i_dev,
+			       dchild->d_inode->i_ino);
+		if (exp2 && EX_CROSSMNT(exp2))
+			/* successfully crossed mount point */
+			exp = exp2;
+		else if (dchild->d_covers->d_sb == dparent->d_sb) {
+			/* stay in the original filesystem */
+			struct dentry *tdentry = dget(dchild->d_covers);
+			dput(dchild);
+			dchild = tdentry;
+		} else {
+			/* This cannot possibly happen */
+			printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name);
+			dput(dchild);
+			err = nfserr_acces;
+			goto out;
+
 		}
 	}
-
 	/*
 	 * Note: we compose the file handle now, but as the
 	 * dentry may be negative, it may need to be updated.
@@ -207,10 +224,6 @@
 out_nfserr:
 	err = nfserrno(-PTR_ERR(dchild));
 	goto out;
-out_dput:
-	dput(dchild);
-	err = nfserr_acces;
-	goto out;
 }
 
 /*
@@ -226,6 +239,7 @@
 	int		ftype = 0;
 	int		imode;
 	int		err;
+	kernel_cap_t	saved_cap = 0;
 
 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
 		accmode |= MAY_WRITE;
@@ -234,7 +248,7 @@
 
 	/* Get inode */
 	err = fh_verify(rqstp, fhp, ftype, accmode);
-	if (err)
+	if (err || !iap->ia_valid)
 		goto out;
 
 	dentry = fhp->fh_dentry;
@@ -266,7 +280,7 @@
 
 current_time_ok:
 
-	/* The size case is special... */
+	/* The size case is special. It changes the file as well as the attributes.  */
 	if (iap->ia_valid & ATTR_SIZE) {
 if (!S_ISREG(inode->i_mode))
 printk("nfsd_setattr: size change??\n");
@@ -275,22 +289,17 @@
 			if (err)
 				goto out;
 		}
-		DQUOT_INIT(inode);
 		err = get_write_access(inode);
-		if (err) {
-			DQUOT_DROP(inode);
+		if (err)
 			goto out_nfserr;
-		}
-		/* N.B. Should we update the inode cache here? */
-		inode->i_size = iap->ia_size;
-		if (inode->i_op && inode->i_op->truncate)
-			inode->i_op->truncate(inode);
-		mark_inode_dirty(inode);
-		put_write_access(inode);
-		DQUOT_DROP(inode);
-		iap->ia_valid &= ~ATTR_SIZE;
-		iap->ia_valid |= ATTR_MTIME;
-		iap->ia_mtime = CURRENT_TIME;
+
+		err = locks_verify_area(FLOCK_VERIFY_WRITE, inode, NULL,
+				  iap->ia_size<inode->i_size ? iap->ia_size : inode->i_size,
+				  abs(inode->i_size - iap->ia_size));
+
+		if (err)
+			goto out_nfserr;
+		DQUOT_INIT(inode);
 	}
 
 	imode = inode->i_mode;
@@ -312,23 +321,32 @@
 	}
 
 	/* Change the attributes. */
-	if (iap->ia_valid) {
-		kernel_cap_t	saved_cap = 0;
 
-		iap->ia_valid |= ATTR_CTIME;
-		iap->ia_ctime = CURRENT_TIME;
-		if (current->fsuid != 0) {
-			saved_cap = current->cap_effective;
-			cap_clear(current->cap_effective);
-		}
+
+	iap->ia_valid |= ATTR_CTIME;
+	if (current->fsuid != 0) {
+		saved_cap = current->cap_effective;
+		cap_clear(current->cap_effective);
+	}
+	if (iap->ia_valid & ATTR_SIZE) {
+		fh_lock(fhp);
 		err = notify_change(dentry, iap);
-		if (current->fsuid != 0)
-			current->cap_effective = saved_cap;
-		if (err)
-			goto out_nfserr;
-		if (EX_ISSYNC(fhp->fh_export))
-			write_inode_now(inode);
+		if (!err) {
+			vmtruncate(inode,iap->ia_size);		
+			if (inode->i_op && inode->i_op->truncate)
+				inode->i_op->truncate(inode);
+		}
+		fh_unlock(fhp);
+		put_write_access(inode);
 	}
+	else
+		err = notify_change(dentry, iap);
+	if (current->fsuid != 0)
+		current->cap_effective = saved_cap;
+	if (err)
+		goto out_nfserr;
+	if (EX_ISSYNC(fhp->fh_export))
+		write_inode_now(inode);
 	err = 0;
 out:
 	return err;
@@ -419,7 +437,6 @@
 		filp->f_op->release(inode, filp);
 	if (filp->f_mode & FMODE_WRITE) {
 		put_write_access(inode);
-		DQUOT_DROP(inode);
 	}
 }
 
@@ -751,7 +768,6 @@
 	 */
 	DQUOT_INIT(dirp);
 	err = opfunc(dirp, dchild, iap->ia_mode, rdev);
-	DQUOT_DROP(dirp);
 	if (err < 0)
 		goto out_nfserr;
 
@@ -806,6 +822,11 @@
 	err = get_write_access(inode);
 	if (err)
 		goto out_nfserr;
+	err = locks_verify_area(FLOCK_VERIFY_WRITE, inode, NULL,
+				  size<inode->i_size ? size : inode->i_size,
+				  abs(inode->i_size - size));
+	if (err)
+		goto out_nfserr;
 
 	/* Things look sane, lock and do it. */
 	fh_lock(fhp);
@@ -817,15 +838,14 @@
 		cap_clear(current->cap_effective);
 	}
 	err = notify_change(dentry, &newattrs);
-	if (current->fsuid != 0)
-		current->cap_effective = saved_cap;
 	if (!err) {
 		vmtruncate(inode, size);
 		if (inode->i_op && inode->i_op->truncate)
 			inode->i_op->truncate(inode);
 	}
+	if (current->fsuid != 0)
+		current->cap_effective = saved_cap;
 	put_write_access(inode);
-	DQUOT_DROP(inode);
 	fh_unlock(fhp);
 out_nfserr:
 	if (err)
@@ -922,7 +942,6 @@
 	if (!dnew->d_inode) {
 		DQUOT_INIT(dirp);
 		err = dirp->i_op->symlink(dirp, dnew, path);
-		DQUOT_DROP(dirp);
 		if (!err) {
 			if (EX_ISSYNC(fhp->fh_export))
 				write_inode_now(dirp);
@@ -1002,7 +1021,6 @@
 
 	DQUOT_INIT(dirp);
 	err = dirp->i_op->link(dold, dirp, dnew);
-	DQUOT_DROP(dirp);
 	if (!err) {
 		if (EX_ISSYNC(ffhp->fh_export)) {
 			write_inode_now(dirp);
@@ -1123,19 +1141,9 @@
 		}
 	} else
 		dprintk("nfsd: Caught race in nfsd_rename");
-	DQUOT_DROP(fdir);
-	DQUOT_DROP(tdir);
 
 	nfsd_double_up(&tdir->i_sem, &fdir->i_sem);
 
-	if (!err && odentry->d_inode) {
-		add_to_rename_cache(tdir->i_ino,
-				    odentry->d_inode->i_dev,
-				    fdir->i_ino,
-				    odentry->d_inode->i_ino);
-	} else {
-		printk(": no inode in rename or err: %d.\n", err);
-	}
 	dput(ndentry);
 
 out_dput_old:
@@ -1183,7 +1191,6 @@
 		goto out;
 	}
 
-	expire_by_dentry(rdentry);
 
 	if (type != S_IFDIR) {
 		/* It's UNLINK */
@@ -1194,11 +1201,9 @@
 
 		err = vfs_unlink(dirp, rdentry);
 
-		DQUOT_DROP(dirp);
 		fh_unlock(fhp);
 
 		dput(rdentry);
-		expire_by_dentry(rdentry);
 	} else {
 		/* It's RMDIR */
 		/* See comments in fs/namei.c:do_rmdir */
@@ -1214,7 +1219,6 @@
 			err = vfs_rmdir(dirp, rdentry);
 
 		rdentry->d_count--;
-		DQUOT_DROP(dirp);
 		if (!fhp->fh_post_version)
 			fhp->fh_post_version = dirp->i_version;
 		fhp->fh_locked = 0;
--- linux/fs/nfsd/nfsctl.c.nfsattack-gafton	Fri Feb  4 23:36:11 2000
+++ linux/fs/nfsd/nfsctl.c	Fri Feb  4 23:36:52 2000
@@ -363,7 +363,8 @@
 	do_nfsservctl = NULL;
 	nfsd_export_shutdown();
 	nfsd_cache_shutdown();
-	nfsd_fh_free();
+	/* deprecated --gaftonc */
+	/* nfsd_fh_free(); */
 	remove_proc_entry("fs/nfs/time-diff-margin", NULL);
 	remove_proc_entry("fs/nfs/exports", NULL);
 	remove_proc_entry("fs/nfs", NULL);
--- linux/fs/Config.in.nfsattack-gafton	Fri Feb  4 23:26:28 2000
+++ linux/fs/Config.in	Fri Feb  4 23:26:34 2000
@@ -90,6 +90,9 @@
       define_bool CONFIG_LOCKD n
     fi
   fi
+  if [ "$CONFIG_NFS_FS" != "n" -o "$CONFIG_NFSD" != "n" ]; then
+    bool '   NFS Version 3' CONFIG_NFS_V3
+  fi
   tristate 'SMB filesystem support (to mount WfW shares etc.)' CONFIG_SMB_FS
 fi
 if [ "$CONFIG_IPX" != "n" -o "$CONFIG_INET" != "n" ]; then
--- linux/fs/locks.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/locks.c	Fri Feb  4 23:26:34 2000
@@ -1134,6 +1134,8 @@
 		new->fl_start = fl->fl_start;
 		new->fl_end = fl->fl_end;
 		new->fl_notify = fl->fl_notify;
+		new->fl_insert = fl->fl_insert;
+		new->fl_remove = fl->fl_remove;
 		new->fl_u = fl->fl_u;
 	}
 	return new;
@@ -1152,6 +1154,9 @@
 	fl->fl_next = *pos;	/* insert into file's list */
 	*pos = fl;
 
+	if (fl->fl_insert)
+		fl->fl_insert(fl);
+
 	return;
 }
 
@@ -1179,6 +1184,9 @@
 		prevfl->fl_nextlink = nextfl;
 	else
 		file_lock_table = nextfl;
+
+	if (thisfl->fl_remove)
+		thisfl->fl_remove(thisfl);
 	
 	locks_wake_up_blocks(thisfl, wait);
 	locks_free_lock(thisfl);
--- linux/fs/super.c.nfsattack-gafton	Tue Jan  4 13:12:23 2000
+++ linux/fs/super.c	Fri Feb  4 23:26:34 2000
@@ -304,8 +304,10 @@
 	{ NFS_MOUNT_SOFT, ",soft" },
 	{ NFS_MOUNT_INTR, ",intr" },
 	{ NFS_MOUNT_POSIX, ",posix" },
+	{ NFS_MOUNT_TCP, ",tcp" },
 	{ NFS_MOUNT_NOCTO, ",nocto" },
 	{ NFS_MOUNT_NOAC, ",noac" },
+	{ NFS_MOUNT_NONLM, ",nolock" },
 	{ 0, NULL }
 };
 
@@ -330,6 +332,8 @@
 		}
 		if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
 			nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
+			len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version);
+
 			if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 				len += sprintf(buf+len, ",rsize=%d",
 					       nfss->rsize);
--- linux/kernel/ksyms.c.nfsattack-gafton	Tue Jan  4 13:12:25 2000
+++ linux/kernel/ksyms.c	Fri Feb  4 23:26:34 2000
@@ -193,7 +193,14 @@
 EXPORT_SYMBOL(vfs_unlink);
 EXPORT_SYMBOL(vfs_rename);
 EXPORT_SYMBOL(__pollwait);
+EXPORT_SYMBOL(locks_mandatory_area);
 EXPORT_SYMBOL(ROOT_DEV);
+
+EXPORT_SYMBOL(page_cache_size);
+EXPORT_SYMBOL(page_hash_table);
+EXPORT_SYMBOL(page_hash_mask);
+EXPORT_SYMBOL(page_hash_bits);
+EXPORT_SYMBOL(__wait_on_page);
 EXPORT_SYMBOL(inode_generation_count);
 
 #if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
--- linux/mm/filemap.c.nfsattack-gafton	Tue Jan  4 13:12:26 2000
+++ linux/mm/filemap.c	Fri Feb  4 23:26:34 2000
@@ -270,18 +270,6 @@
 	update_vm_cache_conditional(inode, pos, buf, count, 0);
 }
 
-
-static inline void add_to_page_cache(struct page * page,
-	struct inode * inode, unsigned long offset,
-	struct page **hash)
-{
-	atomic_inc(&page->count);
-	page->flags = (page->flags & ~((1 << PG_uptodate) | (1 << PG_error))) | (1 << PG_referenced);
-	page->offset = offset;
-	add_page_to_inode_queue(inode, page);
-	__add_page_to_hash_queue(page, hash);
-}
-
 /*
  * Try to read ahead in the file. "page_cache" is a potentially free page
  * that we could use for the cache (if it is 0 we can try to create one,
@@ -1516,8 +1504,6 @@
 
 	while (count) {
 		unsigned long bytes, pgpos, offset;
-		char * dest;
-
 		/*
 		 * Try to find the page in the cache. If it isn't there,
 		 * allocate a free page.
@@ -1545,23 +1531,13 @@
 
 		/* Get exclusive IO access to the page.. */
 		wait_on_page(page);
-		set_bit(PG_locked, &page->flags);
 
 		/*
 		 * Do the real work.. If the writer ends up delaying the write,
 		 * the writer needs to increment the page use counts until he
 		 * is done with the page.
 		 */
-		dest = (char *) page_address(page) + offset;
-		if (dest != buf) /* See comment in update_vm_cache_cond. */
-			bytes -= copy_from_user(dest, buf, bytes);
-		status = -EFAULT;
-		if (bytes)
-			status = inode->i_op->updatepage(file, page, offset, bytes, sync);
-
-		/* Mark it unlocked again and drop the page.. */
-		clear_bit(PG_locked, &page->flags);
-		wake_up(&page->wait);
+		status = inode->i_op->updatepage(file, page, buf, offset, bytes, sync);
 		page_cache_release(page);
 
 		if (status < 0)
--- linux/include/linux/nfsd/const.h.nfsattack-gafton	Tue May 11 13:36:12 1999
+++ linux/include/linux/nfsd/const.h	Fri Feb  4 23:26:34 2000
@@ -16,6 +16,10 @@
 #include <linux/fs.h>
 #include <linux/nfs.h>
 
+#define NFS_FHSIZE		32
+#define NFS_MAXPATHLEN		1024
+#define NFS_MAXNAMLEN		255
+
 /*
  * Maximum protocol version supported by knfsd
  */
--- linux/include/linux/nfsd/export.h.nfsattack-gafton	Tue May 11 13:36:17 1999
+++ linux/include/linux/nfsd/export.h	Fri Feb  4 23:26:34 2000
@@ -34,8 +34,9 @@
 #define NFSEXP_UIDMAP		0x0040
 #define NFSEXP_KERBEROS		0x0080		/* not available */
 #define NFSEXP_SUNSECURE	0x0100
-#define NFSEXP_CROSSMNT		0x0200		/* not available */
-#define NFSEXP_ALLFLAGS		0x03FF
+#define NFSEXP_CROSSMNT		0x0200
+#define NFSEXP_NOSUBTREECHECK	0x0400
+#define NFSEXP_ALLFLAGS		0x07FF
 
 
 #ifdef __KERNEL__
--- linux/include/linux/nfsd/nfsfh.h.nfsattack-gafton	Tue Jan  4 13:12:25 2000
+++ linux/include/linux/nfsd/nfsfh.h	Fri Feb  4 23:33:59 2000
@@ -105,13 +105,6 @@
 void	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *);
 void	fh_update(struct svc_fh *);
 void	fh_put(struct svc_fh *);
-void	nfsd_fh_flush(kdev_t);
-void	nfsd_fh_init(void);
-void	nfsd_fh_shutdown(void);
-void	nfsd_fh_free(void);
-
-void	expire_all(void);
-void	expire_by_dentry(struct dentry *);
 
 static __inline__ struct svc_fh *
 fh_copy(struct svc_fh *dst, struct svc_fh *src)
--- linux/include/linux/lockd/lockd.h.nfsattack-gafton	Tue May 11 13:36:17 1999
+++ linux/include/linux/lockd/lockd.h	Fri Feb  4 23:26:34 2000
@@ -138,12 +138,14 @@
 					struct sockaddr_in *, int, int);
 struct rpc_clnt * nlm_bind_host(struct nlm_host *);
 void		  nlm_rebind_host(struct nlm_host *);
+struct nlm_host * nlm_get_host(struct nlm_host *);
 void		  nlm_release_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
 
 /*
  * Server-side lock handling
  */
+int		  nlmsvc_async_call(struct nlm_rqst *, u32, rpc_action);
 u32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 					struct nlm_lock *, int, struct nlm_cookie *);
 u32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
--- linux/include/linux/lockd/nlm.h.nfsattack-gafton	Mon Apr  7 14:35:31 1997
+++ linux/include/linux/lockd/nlm.h	Fri Feb  4 23:26:34 2000
@@ -10,19 +10,21 @@
 #define LINUX_LOCKD_NLM_H
 
 /* Maximum file offset in file_lock.fl_end */
-#ifdef OFFSET_MAX
-# define NLM_OFFSET_MAX		OFFSET_MAX
-#else
 # define NLM_OFFSET_MAX		((off_t) 0x7fffffff)
-#endif
+# define NLM4_OFFSET_MAX	((s64) ((~(u64)0) >> 1))
 
 /* Return states for NLM */
 enum {
-	NLM_LCK_GRANTED = 0,
-	NLM_LCK_DENIED,
-	NLM_LCK_DENIED_NOLOCKS,
-	NLM_LCK_BLOCKED,
-	NLM_LCK_DENIED_GRACE_PERIOD,
+	NLM_LCK_GRANTED			= 0,
+	NLM_LCK_DENIED			= 1,
+	NLM_LCK_DENIED_NOLOCKS		= 2,
+	NLM_LCK_BLOCKED			= 3,
+	NLM_LCK_DENIED_GRACE_PERIOD	= 4,
+	NLM_DEADLCK			= 5,
+	NLM_ROFS			= 6,
+	NLM_STALE_FH			= 7,
+	NLM_FBIG			= 8,
+	NLM_FAILED			= 9,
 };
 
 #define NLM_PROGRAM		100021
--- linux/include/linux/lockd/xdr.h.nfsattack-gafton	Tue May 11 13:36:15 1999
+++ linux/include/linux/lockd/xdr.h	Fri Feb  4 23:26:34 2000
@@ -74,6 +74,7 @@
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
 void	nlmxdr_init(void);
+void	nlmxdr_shutdown(void);
 int	nlmsvc_decode_testargs(struct svc_rqst *, u32 *, struct nlm_args *);
 int	nlmsvc_encode_testres(struct svc_rqst *, u32 *, struct nlm_res *);
 int	nlmsvc_decode_lockargs(struct svc_rqst *, u32 *, struct nlm_args *);
--- linux/include/linux/sunrpc/auth.h.nfsattack-gafton	Tue May 11 13:35:45 1999
+++ linux/include/linux/sunrpc/auth.h	Fri Feb  4 23:26:34 2000
@@ -64,10 +64,10 @@
 	struct rpc_auth *	(*create)(struct rpc_clnt *);
 	void			(*destroy)(struct rpc_auth *);
 
-	struct rpc_cred *	(*crcreate)(struct rpc_task *);
+	struct rpc_cred *	(*crcreate)(int);
 	void			(*crdestroy)(struct rpc_cred *);
 
-	int			(*crmatch)(struct rpc_task *, struct rpc_cred*);
+	int			(*crmatch)(struct rpc_cred *, int);
 	u32 *			(*crmarshal)(struct rpc_task *, u32 *, int);
 	int			(*crrefresh)(struct rpc_task *);
 	u32 *			(*crvalidate)(struct rpc_task *, u32 *);
@@ -83,10 +83,14 @@
 int			rpcauth_unregister(struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(unsigned int, struct rpc_clnt *);
 void			rpcauth_destroy(struct rpc_auth *);
-struct rpc_cred *	rpcauth_lookupcred(struct rpc_task *);
+struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
+struct rpc_cred *	rpcauth_bindcred(struct rpc_task *);
 void			rpcauth_holdcred(struct rpc_task *);
-void			rpcauth_releasecred(struct rpc_task *);
-int			rpcauth_matchcred(struct rpc_task *, struct rpc_cred *);
+void			rpcauth_releasecred(struct rpc_auth *,
+					    struct rpc_cred *);
+void			rpcauth_unbindcred(struct rpc_task *);
+int			rpcauth_matchcred(struct rpc_auth *,
+					  struct rpc_cred *, int);
 u32 *			rpcauth_marshcred(struct rpc_task *, u32 *);
 u32 *			rpcauth_checkverf(struct rpc_task *, u32 *);
 int			rpcauth_refreshcred(struct rpc_task *);
--- linux/include/linux/sunrpc/clnt.h.nfsattack-gafton	Tue May 11 13:36:15 1999
+++ linux/include/linux/sunrpc/clnt.h	Fri Feb  4 23:26:34 2000
@@ -80,6 +80,13 @@
 	struct rpc_procinfo *	procs;		/* procedure array */
 };
 
+struct rpc_message {
+	u32			proc;		/* Procedure number */
+	void			*arg;		/* Arguments */
+	void			*res;		/* Result */
+	struct rpc_cred		*cred;		/* Credentials */
+};
+
 /*
  * Procedure information
  */
@@ -111,21 +118,23 @@
 void		rpc_getport(struct rpc_task *, struct rpc_clnt *);
 int		rpc_register(u32, u32, int, unsigned short, int *);
 
-int		rpc_call(struct rpc_clnt *clnt, u32 proc,
-				void *argp, void *resp, int flags);
-int		rpc_call_async(struct rpc_task *task, u32 proc,
-				void *argp, void *resp, int flags);
-void		rpc_call_setup(struct rpc_task *task, u32 proc,
-				void *argp, void *resp, int flags);
-int		rpc_do_call(struct rpc_clnt *clnt, u32 proc,
-				void *argp, void *resp, int flags,
-				rpc_action callback, void *clntdata);
+void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
+
+int		rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
+			       int flags, rpc_action callback, void *clntdata);
+int		rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
+			      int flags);
 void		rpc_restart_call(struct rpc_task *);
 void		rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
 
-#define rpc_call(clnt, proc, argp, resp, flags)	\
-		rpc_do_call(clnt, proc, argp, resp, flags, NULL, NULL)
+static __inline__
+int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
+{
+	struct rpc_message msg = { proc, argp, resp, NULL };
+	return rpc_call_sync(clnt, &msg, flags);
+}
+		
 
 extern __inline__ void
 rpc_set_timeout(struct rpc_clnt *clnt, unsigned int retr, unsigned long incr)
--- linux/include/linux/sunrpc/sched.h.nfsattack-gafton	Tue May 11 13:35:45 1999
+++ linux/include/linux/sunrpc/sched.h	Fri Feb  4 23:26:34 2000
@@ -11,6 +11,7 @@
 
 #include <linux/timer.h>
 #include <linux/tqueue.h>
+#include <asm/errno.h>
 #include <linux/sunrpc/types.h>
 
 /*
@@ -55,6 +56,7 @@
 	void			(*tk_callback)(struct rpc_task *);
 	void			(*tk_action)(struct rpc_task *);
 	void			(*tk_exit)(struct rpc_task *);
+	void			(*tk_release)(struct rpc_task *);
 	void *			tk_calldata;
 
 	/*
@@ -66,6 +68,10 @@
 	struct wait_queue *	tk_wait;	/* sync: sleep on this q */
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
 	unsigned short		tk_flags;	/* misc flags */
+	unsigned short		tk_lock;	/* Task lock counter */
+	unsigned int		tk_wakeup   : 1,/* Task waiting to wake up */
+				tk_sleeping : 1,/* Task is truly asleep */
+				tk_active   : 1;/* Task has been activated */
 #ifdef RPC_DEBUG
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
@@ -89,6 +95,7 @@
 #define RPC_TASK_ROOTCREDS	0x0100		/* force root creds */
 #define RPC_TASK_DYNAMIC	0x0200		/* task was kmalloc'ed */
 #define RPC_TASK_KILLED		0x0400		/* task was killed */
+#define RPC_TASK_PRIORITY	0x0800		/* resched when waking up */
 #define RPC_TASK_NFSWRITE	0x1000		/* an NFS writeback */
 
 #define RPC_IS_RUNNING(t)	((t)->tk_flags & RPC_TASK_RUNNING)
@@ -99,6 +106,8 @@
 #define RPC_DO_CALLBACK(t)	((t)->tk_flags & RPC_TASK_CALLBACK)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
+#define RPC_IS_SLEEPING(t)	((t)->tk_sleeping)
+#define RPC_IS_ACTIVATED(t)	((t)->tk_active)
 
 /*
  * RPC synchronization objects
@@ -110,6 +119,7 @@
 #endif
 };
 
+#define RPC_WAITQ_EMPTY(q)	((q)->task == NULL)
 #ifndef RPC_DEBUG
 # define RPC_INIT_WAITQ(name)	((struct rpc_wait_queue) { NULL })
 #else
@@ -125,13 +135,17 @@
 					rpc_action exitfunc, int flags);
 void		rpc_release_task(struct rpc_task *);
 void		rpc_killall_tasks(struct rpc_clnt *);
-void		rpc_execute(struct rpc_task *);
+int		rpc_execute(struct rpc_task *);
 void		rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
 					rpc_action action);
-int		rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
+int		rpc_add_wait_queue(struct rpc_wait_queue *,
+				     struct rpc_task *);
+void		__rpc_remove_wait_queue(struct rpc_task *);
 void		rpc_remove_wait_queue(struct rpc_task *);
 void		rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
 					rpc_action action, rpc_action timer);
+void		rpc_sleep_locked(struct rpc_wait_queue *, struct rpc_task *,
+				 rpc_action action, rpc_action timer);
 void		rpc_cond_wait(struct rpc_wait_queue *, struct rpc_task *,
 					unsigned char *,
 					rpc_action action, rpc_action timer);
@@ -139,6 +153,8 @@
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
 void		rpc_wake_up_status(struct rpc_wait_queue *, int);
+int		rpc_lock_task(struct rpc_task *);
+void		rpc_unlock_task(struct rpc_task *);
 void		rpc_add_timer(struct rpc_task *, rpc_action);
 void		rpc_del_timer(struct rpc_task *);
 void		rpc_delay(struct rpc_task *, unsigned long);
@@ -165,11 +181,18 @@
 	task->tk_action = NULL;
 }
 
+extern __inline__ void
+rpc_kill(struct rpc_task *task, int status)
+{
+        rpc_exit(task, status);
+        rpc_wake_up_task(task);
+}
+
 #ifdef RPC_DEBUG
 extern __inline__ char *
 rpc_qname(struct rpc_wait_queue *q)
 {
-	return q->name? q->name : "unknown";
+	return q? (q->name? q->name : "unknown") : "none";
 }
 #endif
 
--- linux/include/linux/sunrpc/types.h.nfsattack-gafton	Tue May 11 13:35:44 1999
+++ linux/include/linux/sunrpc/types.h	Fri Feb  4 23:26:34 2000
@@ -58,6 +58,7 @@
 	}
 	if (*q == item)
 		*q = next;
+	item->prev = item->next = 0;
 }
 
 #define rpc_insert_list(q, i) \
--- linux/include/linux/sunrpc/xdr.h.nfsattack-gafton	Mon Dec 28 01:18:30 1998
+++ linux/include/linux/sunrpc/xdr.h	Fri Feb  4 23:26:34 2000
@@ -1,7 +1,7 @@
 /*
  * include/linux/sunrpc/xdr.h
  *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
  */
 
 #ifndef _SUNRPC_XDR_H_
@@ -59,13 +59,32 @@
 /*
  * Miscellaneous XDR helper functions
  */
-u32 *	xdr_encode_string(u32 *p, const char *s);
+u32 *	xdr_encode_string(u32 *p, const char *s, int len);
 u32 *	xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen);
 u32 *	xdr_encode_netobj(u32 *p, const struct xdr_netobj *);
 u32 *	xdr_decode_netobj(u32 *p, struct xdr_netobj *);
 u32 *	xdr_decode_netobj_fixed(u32 *p, void *obj, unsigned int len);
 
 /*
+ * Decode 64bit quantities (NFSv3 support)
+ */
+static inline u32 *
+xdr_encode_hyper(u32 *p, __u64 val)
+{
+	*p++ = htonl(val >> 32);
+	*p++ = htonl(val & 0xFFFFFFFF);
+	return p;
+}
+
+static inline u32 *
+xdr_decode_hyper(u32 *p, __u64 *valp)
+{
+	*valp  = ((__u64) ntohl(*p++)) << 32;
+	*valp |= ntohl(*p++);
+	return p;
+}
+
+/*
  * Adjust iovec to reflect end of xdr'ed data (RPC client XDR)
  */
 static inline int
@@ -73,6 +92,9 @@
 {
 	return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base);
 }
+
+void xdr_shift_iovec(struct iovec *, int, size_t);
+void xdr_zero_iovec(struct iovec *, int, size_t);
 
 #endif /* __KERNEL__ */
 
--- linux/include/linux/sunrpc/xprt.h.nfsattack-gafton	Tue Jan  4 13:12:25 2000
+++ linux/include/linux/sunrpc/xprt.h	Fri Feb  4 23:26:34 2000
@@ -17,7 +17,7 @@
 /*
  * Maximum number of iov's we use.
  */
-#define MAX_IOVEC	8
+#define MAX_IOVEC	10
 
 /*
  * The transport code maintains an estimate on the maximum number of out-
@@ -44,10 +44,10 @@
 #define RPC_MAXCWND		(RPC_MAXCONG * RPC_CWNDSCALE)
 #define RPC_INITCWND		RPC_CWNDSCALE
 #define RPCXPRT_CONGESTED(xprt) \
-	((xprt)->cong >= ((xprt)->nocong? RPC_MAXCWND : (xprt)->cwnd))
+	((xprt)->cong >= (xprt)->cwnd)
 
 /* Default timeout values */
-#define RPC_MAX_UDP_TIMEOUT	(6*HZ)
+#define RPC_MAX_UDP_TIMEOUT	(60*HZ)
 #define RPC_MAX_TCP_TIMEOUT	(600*HZ)
 
 /* RPC call and reply header size as number of 32bit words (verifier
@@ -96,7 +96,6 @@
 	struct rpc_task *	rq_task;	/* RPC task data */
 	__u32			rq_xid;		/* request XID */
 	struct rpc_rqst *	rq_next;	/* free list */
-	unsigned char		rq_gotit;	/* reply received */
 	unsigned char		rq_damaged;	/* being received */
 
 	/*
@@ -122,14 +121,10 @@
 #define rq_rlen			rq_rcv_buf.io_len
 
 struct rpc_xprt {
-	struct rpc_xprt *	link;		/* list of all clients */
 	struct rpc_xprt *	rx_pending;	/* receive pending list */
-	struct rpc_xprt *	tx_pending;	/* transmit pending list */
 	
-	int 			rx_pending_flag;/* are we on the rcv pending list ? */
-	int 			tx_pending_flag;/* are we on the xmit pending list ? */
+	int 			rx_pending_flag;/* are we on the pending list ? */
 
-	struct file *		file;		/* VFS layer */
 	struct socket *		sock;		/* BSD socket layer */
 	struct sock *		inet;		/* INET layer */
 
@@ -147,9 +142,9 @@
 	struct rpc_wait_queue	reconn;		/* waiting for reconnect */
 	struct rpc_rqst *	free;		/* free slots */
 	struct rpc_rqst		slot[RPC_MAXREQS];
-	unsigned char		connected;	/* TCP: connected */
-	unsigned char		write_space;	/* TCP: can send */
-	unsigned int		shutdown   : 1,	/* being shut down */
+	unsigned int		connected  : 1,	/* TCP: connected */
+				write_space: 1,	/* TCP: can send */
+				shutdown   : 1,	/* being shut down */
 				nocong	   : 1,	/* no congestion control */
 				stream     : 1,	/* TCP */
 				tcp_more   : 1,	/* more record fragments */
@@ -170,25 +165,22 @@
 	u32			tcp_copied;	/* copied to request */
 
 	/*
-	 * TCP send stuff
+	 * Send stuff
 	 */
-	struct rpc_iov		snd_buf;	/* send buffer */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
-	u32			snd_sent;	/* Bytes we have sent */
 
 
 	void			(*old_data_ready)(struct sock *, int);
 	void			(*old_state_change)(struct sock *);
 	void			(*old_write_space)(struct sock *);
+
+	struct wait_queue *	cong_wait;
 };
 #define tcp_reclen		tcp_recm.header[0]
 #define tcp_xid			tcp_recm.header[1]
 
 #ifdef __KERNEL__
 
-struct rpc_xprt *	xprt_create(struct file *socket,
-					struct sockaddr_in *addr,
-					struct rpc_timeout *toparms);
 struct rpc_xprt *	xprt_create_proto(int proto, struct sockaddr_in *addr,
 					struct rpc_timeout *toparms);
 int			xprt_destroy(struct rpc_xprt *);
@@ -202,6 +194,9 @@
 int			xprt_adjust_timeout(struct rpc_timeout *);
 void			xprt_release(struct rpc_task *);
 void			xprt_reconnect(struct rpc_task *);
+int			xprt_clear_backlog(struct rpc_xprt *);
+
+int			xprt_tcp_pending(void);
 
 #endif /* __KERNEL__*/
 
--- linux/include/linux/dcache.h.nfsattack-gafton	Tue Jan  4 13:12:24 2000
+++ linux/include/linux/dcache.h	Fri Feb  4 23:26:34 2000
@@ -151,7 +151,7 @@
 /* test whether root is busy without destroying dcache */
 extern int is_root_busy(struct dentry *);
 
-/* test whether we have any submounts in a subdir tree */
+/* test whether we have any submounts */
 extern int have_submounts(struct dentry *);
 
 /*
--- linux/include/linux/errno.h.nfsattack-gafton	Thu Jun 12 18:28:32 1997
+++ linux/include/linux/errno.h	Fri Feb  4 23:26:34 2000
@@ -11,6 +11,16 @@
 #define ERESTARTNOHAND	514	/* restart if no handler.. */
 #define ENOIOCTLCMD	515	/* No ioctl command */
 
+/* Defined for the NFSv3 protocol */
+#define EBADHANDLE	521	/* Illegal NFS file handle */
+#define ENOTSYNC	522	/* Update synchronization mismatch */
+#define EBADCOOKIE	523	/* Cookie is stale */
+#define ENOTSUPP	524	/* Operation is not supported */
+#define ETOOSMALL	525	/* Buffer or request is too small */
+#define ESERVERFAULT	526	/* An untranslatable error occurred */
+#define EBADTYPE	527	/* Type not supported by server */
+#define EJUKEBOX	528	/* Request initiated, but will not complete before timeout */
+
 #endif
 
 #endif
--- linux/include/linux/fs.h.nfsattack-gafton	Fri Feb  4 23:26:34 2000
+++ linux/include/linux/fs.h	Fri Feb  4 23:26:34 2000
@@ -463,6 +463,8 @@
 	off_t fl_end;
 
 	void (*fl_notify)(struct file_lock *);	/* unblock callback */
+	void (*fl_insert)(struct file_lock *);	/* lock insertion callback */
+	void (*fl_remove)(struct file_lock *);	/* lock removal callback */
 
 	union {
 		struct nfs_lock_info	nfs_fl;
@@ -616,7 +618,7 @@
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int);
 	int (*smap) (struct inode *,int);
-	int (*updatepage) (struct file *, struct page *, unsigned long, unsigned int, int);
+	int (*updatepage) (struct file *, struct page *, const char *, unsigned long, unsigned int, int);
 	int (*revalidate) (struct dentry *);
 };
 
--- linux/include/linux/nfs.h.nfsattack-gafton	Sat Mar  6 17:21:13 1999
+++ linux/include/linux/nfs.h	Fri Feb  4 23:26:34 2000
@@ -1,54 +1,60 @@
 /*
  * NFS protocol definitions
+ *
+ * This file contains constants mostly for Version 2 of the protocol,
+ * but also has a couple of NFSv3 bits in (notably the error codes).
  */
 #ifndef _LINUX_NFS_H
 #define _LINUX_NFS_H
 
 #include <linux/sunrpc/msg_prot.h>
 
-#define NFS_PORT	2049
-#define NFS_MAXDATA	8192
-#define NFS_MAXPATHLEN	1024
-#define NFS_MAXNAMLEN	255
-#define NFS_MAXGROUPS	16
-#define NFS_FHSIZE	32
-#define NFS_COOKIESIZE	4
-#define NFS_FIFO_DEV	(-1)
-#define NFSMODE_FMT	0170000
-#define NFSMODE_DIR	0040000
-#define NFSMODE_CHR	0020000
-#define NFSMODE_BLK	0060000
-#define NFSMODE_REG	0100000
-#define NFSMODE_LNK	0120000
-#define NFSMODE_SOCK	0140000
-#define NFSMODE_FIFO	0010000
+#define NFS_PROGRAM		100003
 
-	
+/*
+ * NFS stats. The good thing with these values is that NFSv3 errors are
+ * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which
+ * no-one uses anyway), so we can happily mix code as long as we make sure
+ * no NFSv3 errors are returned to NFSv2 clients.
+ * Error codes that have a `--' in the v2 column are not part of the
+ * standard, but seem to be widely used nevertheless.
+ */
 enum nfs_stat {
-	NFS_OK = 0,
-	NFSERR_PERM = 1,
-	NFSERR_NOENT = 2,
-	NFSERR_IO = 5,
-	NFSERR_NXIO = 6,
-	NFSERR_EAGAIN = 11,
-	NFSERR_ACCES = 13,
-	NFSERR_EXIST = 17,
-	NFSERR_XDEV = 18,
-	NFSERR_NODEV = 19,
-	NFSERR_NOTDIR = 20,
-	NFSERR_ISDIR = 21,
-	NFSERR_INVAL = 22,	/* that Sun forgot */
-	NFSERR_FBIG = 27,
-	NFSERR_NOSPC = 28,
-	NFSERR_ROFS = 30,
-	NFSERR_OPNOTSUPP = 45,
-	NFSERR_NAMETOOLONG = 63,
-	NFSERR_NOTEMPTY = 66,
-	NFSERR_DQUOT = 69,
-	NFSERR_STALE = 70,
-	NFSERR_WFLUSH = 99
+	NFS_OK = 0,			/* v2 v3 */
+	NFSERR_PERM = 1,		/* v2 v3 */
+	NFSERR_NOENT = 2,		/* v2 v3 */
+	NFSERR_IO = 5,			/* v2 v3 */
+	NFSERR_NXIO = 6,		/* v2 v3 */
+	NFSERR_EAGAIN = 11,		/* v2 v3 */
+	NFSERR_ACCES = 13,		/* v2 v3 */
+	NFSERR_EXIST = 17,		/* v2 v3 */
+	NFSERR_XDEV = 18,		/*    v3 */
+	NFSERR_NODEV = 19,		/* v2 v3 */
+	NFSERR_NOTDIR = 20,		/* v2 v3 */
+	NFSERR_ISDIR = 21,		/* v2 v3 */
+	NFSERR_INVAL = 22,		/* v2 v3 that Sun forgot */
+	NFSERR_FBIG = 27,		/* v2 v3 */
+	NFSERR_NOSPC = 28,		/* v2 v3 */
+	NFSERR_ROFS = 30,		/* v2 v3 */
+	NFSERR_MLINK = 31,		/*    v3 */
+	NFSERR_OPNOTSUPP = 45,		/* v2 v3 */
+	NFSERR_NAMETOOLONG = 63,	/* v2 v3 */
+	NFSERR_NOTEMPTY = 66,		/* v2 v3 */
+	NFSERR_DQUOT = 69,		/* v2 v3 */
+	NFSERR_STALE = 70,		/* v2 v3 */
+	NFSERR_REMOTE = 71,		/* v2 v3 */
+	NFSERR_WFLUSH = 99,		/* v2    */
+	NFSERR_BADHANDLE = 10001,	/*    v3 */
+	NFSERR_NOT_SYNC = 10002,	/*    v3 */
+	NFSERR_BAD_COOKIE = 10003,	/*    v3 */
+	NFSERR_NOTSUPP = 10004,		/*    v3 */
+	NFSERR_TOOSMALL = 10005,	/*    v3 */
+	NFSERR_SERVERFAULT = 10006,	/*    v3 */
+	NFSERR_BADTYPE = 10007,		/*    v3 */
+	NFSERR_JUKEBOX = 10008		/*    v3 */
 };
 
+/* NFSv2 file types - beware, these are not the same in NFSv3 */
 enum nfs_ftype {
 	NFNON = 0,
 	NFREG = 1,
@@ -61,166 +67,149 @@
 	NFFIFO = 8
 };
 
-struct nfs_fh {
-	char			data[NFS_FHSIZE];
-};
 
-#define NFS_PROGRAM		100003
-#define NFS_VERSION		2
-#define NFSPROC_NULL		0
-#define NFSPROC_GETATTR		1
-#define NFSPROC_SETATTR		2
-#define NFSPROC_ROOT		3
-#define NFSPROC_LOOKUP		4
-#define NFSPROC_READLINK	5
-#define NFSPROC_READ		6
-#define NFSPROC_WRITECACHE	7
-#define NFSPROC_WRITE		8
-#define NFSPROC_CREATE		9
-#define NFSPROC_REMOVE		10
-#define NFSPROC_RENAME		11
-#define NFSPROC_LINK		12
-#define NFSPROC_SYMLINK		13
-#define NFSPROC_MKDIR		14
-#define NFSPROC_RMDIR		15
-#define NFSPROC_READDIR		16
-#define NFSPROC_STATFS		17
-
-/* Mount support for NFSroot */
 #ifdef __KERNEL__
+/* 
+ * Mount support for NFSroot
+ */
 #define NFS_MNT_PROGRAM		100005
 #define NFS_MNT_VERSION		1
 #define NFS_MNT_PORT		627
 #define NFS_MNTPROC_MNT		1
 #define NFS_MNTPROC_UMNT	3
-#endif
+
+/*
+ * This is really a general kernel constant, but since nothing like
+ * this is defined in the kernel headers, I have to do it here.
+ */
+#define NFS_OFFSET_MAX		((__s64)((~(__u64)0) >> 1))
+
+#endif /* __KERNEL__ */
 
 #if defined(__KERNEL__) || defined(NFS_NEED_KERNEL_TYPES)
 
-extern struct rpc_program	nfs_program;
-extern struct rpc_stat		nfs_rpcstat;
+/*
+ * These data types are used exlusively by the NFS client implementation.
+ * They support both NFSv2 and NFSv3.
+ */
 
-struct nfs_time {
-	__u32			seconds;
-	__u32			useconds;
+/*
+ * This is the kernel NFS client file handle representation
+ */
+#define NFS_MAXFHSIZE		64
+struct nfs_fh {
+	unsigned short		size;
+	unsigned char		data[NFS_MAXFHSIZE];
 };
 
 struct nfs_fattr {
-	enum nfs_ftype		type;
+	unsigned short          valid;		/* which fields are valid */
+	__u64                   pre_size;	/* pre_op_attr.size       */
+	__u64			pre_mtime;	/* pre_op_attr.mtime */
+	__u64			pre_ctime;	/* pre_op_attr.ctime */
+	enum nfs_ftype		type;           /* always use NFSv2 types */
 	__u32			mode;
 	__u32			nlink;
 	__u32			uid;
 	__u32			gid;
-	__u32			size;
-	__u32			blocksize;
+	__u64			size;
+	union {
+		struct {
+			__u32	blocksize;
+			__u32	blocks;
+		} nfs2;
+		struct {
+			__u64   used;
+		} nfs3;
+	} du;
 	__u32			rdev;
-	__u32			blocks;
-	__u32			fsid;
-	__u32			fileid;
-	struct nfs_time		atime;
-	struct nfs_time		mtime;
-	struct nfs_time		ctime;
+	__u64			fsid;
+	__u64			fileid;
+	__u64		        atime;
+        __u64		        mtime;
+	__u64		        ctime;
 };
 
-struct nfs_sattr {
-	__u32			mode;
-	__u32			uid;
-	__u32			gid;
-	__u32			size;
-	struct nfs_time		atime;
-	struct nfs_time		mtime;
-};
 
-struct nfs_fsinfo {
-	__u32			tsize;
-	__u32			bsize;
-	__u32			blocks;
-	__u32			bfree;
-	__u32			bavail;
-};
 
-struct nfs_writeargs {
-	struct nfs_fh *		fh;
-	__u32			offset;
-	__u32			count;
-	const void *		buffer;
-};
-
-#ifdef NFS_NEED_XDR_TYPES
+#define NFS_ATTR_WCC		0x0001		/* pre-op WCC data */
+#define NFS_ATTR_FATTR		0x0002		/* post-op attributes */
+#define NFS_ATTR_FATTR_V3	0x0004		/* NFSv3 attributes */
 
-struct nfs_sattrargs {
-	struct nfs_fh *		fh;
-	struct nfs_sattr *	sattr;
+/*
+ * Info on the file system
+ */
+struct nfs_fsinfo {
+	__u32			rtmax;	/* max.  read transfer size */
+	__u32			rtpref;	/* pref. read transfer size */
+	__u32			rtmult;	/* reads should be multiple of this */
+	__u32			wtmax;	/* max.  write transfer size */
+	__u32			wtpref;	/* pref. write transfer size */
+	__u32			wtmult;	/* writes should be multiple of this */
+	__u32			dtpref;	/* pref. readdir transfer size */
+	__u64			maxfilesize;
+	__u64			bsize;	/* block size */
+	__u64			tbytes;	/* total size in bytes */
+	__u64			fbytes;	/* # of free bytes */
+	__u64			abytes;	/* # of bytes available to user */
+	__u64			tfiles;	/* # of files */
+	__u64			ffiles;	/* # of free files */
+	__u64			afiles;	/* # of files available to user */
+	__u32			linkmax;/* max # of hard links */
+	__u32			namelen;/* max name length */
 };
 
-struct nfs_diropargs {
-	struct nfs_fh *		fh;
-	const char *		name;
-};
+/* Arguments to the read call.
+ * Note that NFS_READ_MAXIOV must be <= (MAX_IOVEC-2) from sunrpc/xprt.h
+ */
+#define NFS_READ_MAXIOV 8
 
 struct nfs_readargs {
 	struct nfs_fh *		fh;
-	__u32			offset;
+	__u64			offset;
 	__u32			count;
-	void *			buffer;
+	unsigned int            nriov;
+	struct iovec            iov[NFS_READ_MAXIOV];
 };
 
-struct nfs_createargs {
-	struct nfs_fh *		fh;
-	const char *		name;
-	struct nfs_sattr *	sattr;
-};
-
-struct nfs_renameargs {
-	struct nfs_fh *		fromfh;
-	const char *		fromname;
-	struct nfs_fh *		tofh;
-	const char *		toname;
-};
-
-struct nfs_linkargs {
-	struct nfs_fh *		fromfh;
-	struct nfs_fh *		tofh;
-	const char *		toname;
+struct nfs_readres {
+	struct nfs_fattr *	fattr;
+	unsigned int		count;
+	int                     eof;
 };
 
-struct nfs_symlinkargs {
-	struct nfs_fh *		fromfh;
-	const char *		fromname;
-	const char *		topath;
-	struct nfs_sattr *	sattr;
-};
+/* Arguments to the write call.
+ * Note that NFS_WRITE_MAXIOV must be <= (MAX_IOVEC-2) from sunrpc/xprt.h
+ */
+#define NFS_WRITE_MAXIOV        8
 
-struct nfs_readdirargs {
-	struct nfs_fh *		fh;
-	__u32			cookie;
-	void *			buffer;
-	unsigned int		bufsiz;
-};
+enum nfs3_stable_how {
+                  NFS_UNSTABLE = 0,
+                  NFS_DATA_SYNC = 1,
+                  NFS_FILE_SYNC = 2
+              };
 
-struct nfs_diropok {
+struct nfs_writeargs {
 	struct nfs_fh *		fh;
-	struct nfs_fattr *	fattr;
+	__u64			offset;
+	__u32			count;
+	int                     stable;
+	unsigned int            nriov;
+	struct iovec            iov[NFS_WRITE_MAXIOV];
 };
 
-struct nfs_readres {
-	struct nfs_fattr *	fattr;
-	unsigned int		count;
+struct nfs_writeverf {
+	int                     committed;
+	__u32                   verifier[2];
 };
 
-struct nfs_readlinkres {
-	char **			string;
-	unsigned int *		lenp;
-	unsigned int		maxlen;
-	void *			buffer;
+struct nfs_writeres {
+        struct nfs_fattr *      fattr;
+        struct nfs_writeverf *  verf;
+        __u32                   count;
 };
 
-struct nfs_readdirres {
-	void *			buffer;
-	unsigned int		bufsiz;
-};
+#endif /* __KERNEL__ || NFS_NEED_KERNEL_TYPES */
 
-#endif /* NFS_NEED_XDR_TYPES */
-#endif /* __KERNEL__ */
 
 #endif
--- linux/include/linux/nfs2.h.nfsattack-gafton	Fri Feb  4 23:26:34 2000
+++ linux/include/linux/nfs2.h	Fri Feb  4 23:26:34 2000
@@ -0,0 +1,153 @@
+/*
+ * NFS protocol definitions
+ *
+ * This file contains constants for Version 2 of the protocol.
+ */
+#ifndef _LINUX_NFS2_H
+#define _LINUX_NFS2_H
+
+#define NFS2_PORT	2049
+#define NFS2_MAXDATA	8192
+#define NFS2_MAXPATHLEN	1024
+#define NFS2_MAXNAMLEN	255
+#define NFS2_MAXGROUPS	16
+#define NFS2_FHSIZE	32
+#define NFS2_COOKIESIZE	4
+#define NFS2_FIFO_DEV	(-1)
+#define NFS2MODE_FMT	0170000
+#define NFS2MODE_DIR	0040000
+#define NFS2MODE_CHR	0020000
+#define NFS2MODE_BLK	0060000
+#define NFS2MODE_REG	0100000
+#define NFS2MODE_LNK	0120000
+#define NFS2MODE_SOCK	0140000
+#define NFS2MODE_FIFO	0010000
+
+	
+
+
+/* NFSv2 file types - beware, these are not the same in NFSv3 */
+enum nfs2_ftype {
+	NF2NON = 0,
+	NF2REG = 1,
+	NF2DIR = 2,
+	NF2BLK = 3,
+	NF2CHR = 4,
+	NF2LNK = 5,
+	NF2SOCK = 6,
+	NF2BAD = 7,
+	NF2FIFO = 8
+};
+
+struct nfs2_fh {
+	char			data[NFS2_FHSIZE];
+};
+
+/*
+ * Procedure numbers for NFSv2
+ */
+#define NFS2_VERSION		2
+#define NFSPROC_NULL		0
+#define NFSPROC_GETATTR		1
+#define NFSPROC_SETATTR		2
+#define NFSPROC_ROOT		3
+#define NFSPROC_LOOKUP		4
+#define NFSPROC_READLINK	5
+#define NFSPROC_READ		6
+#define NFSPROC_WRITECACHE	7
+#define NFSPROC_WRITE		8
+#define NFSPROC_CREATE		9
+#define NFSPROC_REMOVE		10
+#define NFSPROC_RENAME		11
+#define NFSPROC_LINK		12
+#define NFSPROC_SYMLINK		13
+#define NFSPROC_MKDIR		14
+#define NFSPROC_RMDIR		15
+#define NFSPROC_READDIR		16
+#define NFSPROC_STATFS		17
+
+#define NFS_MNT_PROGRAM		100005
+#define NFS_MNT_VERSION		1
+#define MNTPROC_NULL		0
+#define MNTPROC_MNT		1
+#define MNTPROC_UMNT		3
+#define MNTPROC_UMNTALL		4
+
+/*
+ * The following types are for NFSv2 only.
+ */
+#if (defined(__KERNEL__) || defined (NFS_NEED_KERNEL_TYPES)) && defined(NFS_NEED_NFS2_XDR_TYPES)
+struct nfs_sattrargs {
+	struct nfs_fh *		fh;
+	struct iattr *	        sattr;
+};
+
+struct nfs_diropargs {
+	struct nfs_fh *		fh;
+	const char *		name;
+	int                     len;
+};
+
+
+struct nfs_createargs {
+	struct nfs_fh *		fh;
+	const char *		name;
+	int                     len;
+	struct iattr *	        sattr;
+};
+
+struct nfs_renameargs {
+	struct nfs_fh *		fromfh;
+	const char *		fromname;
+	int                     fromlen;
+	struct nfs_fh *		tofh;
+	const char *		toname;
+	int                     tolen;
+};
+
+struct nfs_linkargs {
+	struct nfs_fh *		fromfh;
+	struct nfs_fh *		tofh;
+	const char *		toname;
+	int                     tolen;
+};
+
+struct nfs_symlinkargs {
+	struct nfs_fh *		fromfh;
+	const char *		fromname;
+	int                     fromlen;
+	const char *		topath;
+	int                     tolen;
+	struct iattr *	        sattr;
+};
+
+struct nfs_readdirargs {
+	struct nfs_fh *		fh;
+	__u32			cookie;
+	void *			buffer;
+	unsigned int		bufsiz;
+};
+
+struct nfs_diropok {
+	struct nfs_fh *		fh;
+	struct nfs_fattr *	fattr;
+};
+
+struct nfs_readlinkargs {
+	struct nfs_fh *		fh;
+	void *			buffer;
+	unsigned int		bufsiz;
+};
+
+struct nfs_readlinkres {
+	void *			buffer;
+	unsigned int		bufsiz;
+};
+
+struct nfs_readdirres {
+	void *			buffer;
+	unsigned int		bufsiz;
+};
+#endif /* NFS_NEED_NFS2_XDR_TYPES */
+
+#endif
--- linux/include/linux/nfs3.h.nfsattack-gafton	Mon Apr  7 14:35:31 1997
+++ linux/include/linux/nfs3.h	Fri Feb  4 23:26:34 2000
@@ -4,15 +4,12 @@
 #ifndef _LINUX_NFS3_H
 #define _LINUX_NFS3_H
 
-#include <linux/sunrpc/msg_prot.h>
-#include <linux/nfs.h>
-
 #define NFS3_PORT		2049
-#define NFS3_MAXDATA		8192
+#define NFS3_MAXDATA		32768
 #define NFS3_MAXPATHLEN		PATH_MAX
 #define NFS3_MAXNAMLEN		NAME_MAX
 #define NFS3_MAXGROUPS		16
-#define NFS3_FHSIZE		NFS_FHSIZE
+#define NFS3_FHSIZE		64
 #define NFS3_COOKIESIZE		4
 #define NFS3_FIFO_DEV		(-1)
 #define NFS3MODE_FMT		0170000
@@ -24,39 +21,28 @@
 #define NFS3MODE_SOCK		0140000
 #define NFS3MODE_FIFO		0010000
 
-	
-enum nfs3_stat {
-	NFS3_OK			= 0,
-	NFS3ERR_PERM		= 1,
-	NFS3ERR_NOENT		= 2,
-	NFS3ERR_IO		= 5,
-	NFS3ERR_NXIO		= 6,
-	NFS3ERR_EAGAIN		= 11,
-	NFS3ERR_ACCES		= 13,
-	NFS3ERR_EXIST		= 17,
-	NFS3ERR_XDEV		= 18,	/* new in NFSv3 */
-	NFS3ERR_NODEV		= 19,
-	NFS3ERR_NOTDIR		= 20,
-	NFS3ERR_ISDIR		= 21,
-	NFS3ERR_INVAL		= 22,	/* new in NFSv3 */
-	NFS3ERR_FBIG		= 27,
-	NFS3ERR_NOSPC		= 28,
-	NFS3ERR_ROFS		= 30,
-	NFS3ERR_MLINK		= 31,	/* new in NFSv3 */
-	NFS3ERR_NAMETOOLONG	= 63,
-	NFS3ERR_NOTEMPTY	= 66,
-	NFS3ERR_DQUOT		= 69,
-	NFS3ERR_STALE		= 70,
-	NFS3ERR_REMOTE		= 71,	/* new in NFSv3 */
-	NFS3ERR_BADHANDLE	= 10001,/* ditto */
-	NFS3ERR_NOT_SYNC	= 10002,/* ditto */
-	NFS3ERR_BAD_COOKIE	= 10003,/* ditto */
-	NFS3ERR_NOTSUPP		= 10004,/* ditto */
-	NFS3ERR_TOOSMALL	= 10005,/* ditto */
-	NFS3ERR_SERVERFAULT	= 10006,/* ditto */
-	NFS3ERR_BADTYPE		= 10007,/* ditto */
-	NFS3ERR_JUKEBOX		= 10008,/* ditto */
-};
+/* Flags for access() call */
+#define NFS3_ACCESS_READ	0x0001
+#define NFS3_ACCESS_LOOKUP	0x0002
+#define NFS3_ACCESS_MODIFY	0x0004
+#define NFS3_ACCESS_EXTEND	0x0008
+#define NFS3_ACCESS_DELETE	0x0010
+#define NFS3_ACCESS_EXECUTE	0x0020
+
+/* Flags for create mode */
+#define NFS3_CREATE_UNCHECKED	0
+#define NFS3_CREATE_GUARDED	1
+#define NFS3_CREATE_EXCLUSIVE	2
+
+/* NFSv3 file system properties */
+#define NFS3_FSF_LINK		0x0001
+#define NFS3_FSF_SYMLINK	0x0002
+#define NFS3_FSF_HOMOGENEOUS	0x0008
+#define NFS3_FSF_CANSETTIME	0x0010
+/* Some shorthands. See fs/nfsd/nfs3proc.c */
+#define NFS3_FSF_DEFAULT	0x001B
+#define NFS3_FSF_BILLYBOY	0x0018
+#define NFS3_FSF_READONLY	0x0008
 
 enum nfs3_ftype {
 	NF3NON  = 0,
@@ -71,182 +57,164 @@
 };
 
 #define NFS3_VERSION		3
-#define NFSPROC_NULL		0
-#define NFSPROC_GETATTR		1
-#define NFSPROC_SETATTR		2
-#define NFSPROC_ROOT		3
-#define NFSPROC_LOOKUP		4
-#define NFSPROC_READLINK	5
-#define NFSPROC_READ		6
-#define NFSPROC_WRITECACHE	7
-#define NFSPROC_WRITE		8
-#define NFSPROC_CREATE		9
-#define NFSPROC_REMOVE		10
-#define NFSPROC_RENAME		11
-#define NFSPROC_LINK		12
-#define NFSPROC_SYMLINK		13
-#define NFSPROC_MKDIR		14
-#define NFSPROC_RMDIR		15
-#define NFSPROC_READDIR		16
-#define NFSPROC_STATFS		17
+#define NFS3PROC_NULL		0
+#define NFS3PROC_GETATTR	1
+#define NFS3PROC_SETATTR	2
+#define NFS3PROC_LOOKUP		3
+#define NFS3PROC_ACCESS		4
+#define NFS3PROC_READLINK	5
+#define NFS3PROC_READ		6
+#define NFS3PROC_WRITE		7
+#define NFS3PROC_CREATE		8
+#define NFS3PROC_MKDIR		9
+#define NFS3PROC_SYMLINK	10
+#define NFS3PROC_MKNOD		11
+#define NFS3PROC_REMOVE		12
+#define NFS3PROC_RMDIR		13
+#define NFS3PROC_RENAME		14
+#define NFS3PROC_LINK		15
+#define NFS3PROC_READDIR	16
+#define NFS3PROC_READDIRPLUS	17
+#define NFS3PROC_FSSTAT		18
+#define NFS3PROC_FSINFO		19
+#define NFS3PROC_PATHCONF	20
+#define NFS3PROC_COMMIT		21
+
+#define NFS_MNT3_PROGRAM	100005
+#define NFS_MNT3_VERSION	3
+#define MOUNTPROC3_NULL		0
+#define MOUNTPROC3_MNT		1
+#define MOUNTPROC3_UMNT		3
+#define MOUNTPROC3_UMNTALL	4
+ 
 
 #if defined(__KERNEL__) || defined(NFS_NEED_KERNEL_TYPES)
 
-struct nfs3_fh {
-	__u32			size;
-	__u8			data[NFS3_FHSIZE];
-};
-
-struct nfs3_fattr {
-	enum nfs3_ftype		type;
-	__u32			mode;
-	__u32			nlink;
-	__u32			uid;
-	__u32			gid;
-	__u64			size;
-	__u64			used;
-	__u32			rdev_maj;
-	__u32			rdev_min;
-	__u32			fsid;
-	__u32			fileid;
-	struct nfs_time		atime;
-	struct nfs_time		mtime;
-	struct nfs_time		ctime;
-};
-
-struct nfs3_wcc_attr {
-	__u64			size;
-	struct nfs_time		mtime;
-	struct nfs_time		ctime;
-};
-
-struct nfs3_wcc_data {
-	struct nfs3_wcc_attr	before;
-	struct nfs3_wcc_attr	after;
-};
-
-struct nfs3_sattr {
-	__u32			valid;
-	__u32			mode;
-	__u32			uid;
-	__u32			gid;
-	__u64			size;
-	struct nfs_time		atime;
-	struct nfs_time		mtime;
-};
-
-struct nfs3_entry {
-	__u32			fileid;
-	char *			name;
-	unsigned int		length;
-	__u32			cookie;
-	__u32			eof;
-};
-
-struct nfs3_fsinfo {
-	__u32			tsize;
-	__u32			bsize;
-	__u32			blocks;
-	__u32			bfree;
-	__u32			bavail;
-};
+/* Number of 32bit words in post_op_attr */
+#define NFS3_POST_OP_ATTR_WORDS		22
 
-#ifdef NFS_NEED_XDR_TYPES
+#ifdef NFS_NEED_NFS3_XDR_TYPES
 
 struct nfs3_sattrargs {
 	struct nfs_fh *		fh;
-	struct nfs_sattr *	sattr;
+	struct iattr *		sattr;
+	unsigned int		guard;
+	time_t			guardtime;
 };
 
 struct nfs3_diropargs {
 	struct nfs_fh *		fh;
 	const char *		name;
+	int			len;
 };
 
-struct nfs3_readargs {
+struct nfs3_accessargs {
 	struct nfs_fh *		fh;
-	__u32			offset;
-	__u32			count;
-	void *			buffer;
+	__u32			access;
 };
 
-struct nfs3_writeargs {
+struct nfs3_createargs {
 	struct nfs_fh *		fh;
-	__u32			offset;
-	__u32			count;
-	const void *		buffer;
+	const char *		name;
+	int			len;
+	struct iattr *		sattr;
+	unsigned int		createmode;
+	__u32			verifier[2];
 };
 
-struct nfs3_createargs {
+struct nfs3_mkdirargs {
 	struct nfs_fh *		fh;
 	const char *		name;
-	struct nfs_sattr *	sattr;
+	int			len;
+	struct iattr *		sattr;
+};
+
+struct nfs3_symlinkargs {
+	struct nfs_fh *		fromfh;
+	const char *		fromname;
+	int			fromlen;
+	const char *		topath;
+	int			tolen;
+	struct iattr *		sattr;
+};
+
+struct nfs3_mknodargs {
+	struct nfs_fh *		fh;
+	const char *		name;
+	int			len;
+	enum nfs3_ftype		type;
+	struct iattr *		sattr;
+	dev_t			rdev;
 };
 
 struct nfs3_renameargs {
 	struct nfs_fh *		fromfh;
 	const char *		fromname;
+	int			fromlen;
 	struct nfs_fh *		tofh;
 	const char *		toname;
+	int			tolen;
 };
 
 struct nfs3_linkargs {
 	struct nfs_fh *		fromfh;
 	struct nfs_fh *		tofh;
 	const char *		toname;
-};
-
-struct nfs3_symlinkargs {
-	struct nfs_fh *		fromfh;
-	const char *		fromname;
-	const char *		topath;
-	struct nfs_sattr *	sattr;
+	int			tolen;
 };
 
 struct nfs3_readdirargs {
 	struct nfs_fh *		fh;
 	__u32			cookie;
+	__u32			verf[2];
 	void *			buffer;
 	unsigned int		bufsiz;
+	int			plus;
 };
 
-struct nfs3_diropok {
+struct nfs3_diropres {
+	struct nfs_fattr *	dir_attr;
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
 };
 
-struct nfs3_readres {
+struct nfs3_accessres {
 	struct nfs_fattr *	fattr;
-	unsigned int		count;
+	__u32			access;
 };
 
-struct nfs3_readlinkres {
-	char **			string;
-	unsigned int *		lenp;
-	unsigned int		maxlen;
+struct nfs3_readlinkargs {
+	struct nfs_fh *		fh;
 	void *			buffer;
+	unsigned int		bufsiz;
 };
 
-struct nfs3_readdirres {
+struct nfs3_readlinkres {
+	struct nfs_fattr *	fattr;
 	void *			buffer;
 	unsigned int		bufsiz;
 };
 
-/*
- * The following are for NFSv3
- */
-struct nfs3_fh {
-	__u32			size;
-	__u8			data[NFS3_FHSIZE]
+struct nfs3_renameres {
+	struct nfs_fattr *	fromattr;
+	struct nfs_fattr *	toattr;
 };
 
-struct nfs3_wcc_attr {
-	__u64			size;
-	struct nfs_time		mtime;
-	struct nfs_time		ctime;
+struct nfs3_linkres {
+	struct nfs_fattr *	dir_attr;
+	struct nfs_fattr *	fattr;
+};
+
+struct nfs3_readdirres {
+	struct nfs_fattr *	dir_attr;
+	__u32 *			verf;
+	void *			buffer;
+	unsigned int		bufsiz;
+	int			plus;
 };
 
 #endif /* NFS_NEED_XDR_TYPES */
-#endif /* __KERNEL__ */
 
-#endif
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_NFS3_H */
--- linux/include/linux/nfs_cluster.h.nfsattack-gafton	Fri Feb  4 23:26:34 2000
+++ linux/include/linux/nfs_cluster.h	Fri Feb  4 23:26:34 2000
@@ -0,0 +1,175 @@
+#ifndef NFS_CLUSTER_H
+#define NFS_CLUSTER_H
+
+
+/*
+ * Valid flags for a dirty buffer
+ */
+#define PG_UNLOCK_AFTER         0x0001
+#define PG_CANCELLED            0x0002
+#define PG_INVALIDATE_AFTER     0x0004
+
+#define NFS_WRITEBACK_DELAY	(5*HZ)
+#define NFS_WRITEBACK_LOCKDELAY	(60*HZ)
+#define NFS_COMMIT_DELAY	(5*HZ)
+
+#ifdef __KERNEL__
+#include <linux/nfs_fs_sb.h>
+/*
+ * This is the struct where the WRITE/COMMIT arguments go.
+ */
+struct nfs_write_data {
+	struct rpc_task		task;
+	unsigned short		index;		/* request index */
+	unsigned short		count;		/* # of coalesced pages */
+	struct nfs_writeargs	args;		/* argument struct */
+	struct nfs_writeres	res;		/* result struct */
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	verf;
+	unsigned int		commit;		/* commit id */
+};
+
+struct nfs_page {
+	struct page		*page;
+	unsigned short		flags;
+	struct nfs_cluster	*cluster;	/* Back pointer to cluster */
+	__u64			start;
+	__u64			end;
+	unsigned long		timeout;	/* when to write/commit */
+	struct	nfs_write_data	wdata;		/* rpc call info */
+};
+
+
+typedef int (*nfs_flush_req)(struct nfs_page *, int);
+
+
+/*
+ * Counters of total number and pending number of requests.
+ * When the total number of requests exceeds the soft limit, we start
+ * flushing out requests. If it exceeds the hard limit, we stall until
+ * it drops again.
+ */
+#define MAX_REQUEST_SOFT        128
+#define MAX_REQUEST_HARD        4096
+
+/*
+ * Maximum number of requests per write cluster.
+ * 32 requests per cluster account for 128K of data on an intel box.
+ * Note: it's a good idea to make this number smaller than MAX_REQUEST_SOFT.
+ *
+ * For 100Mbps Ethernet, 128 pages (i.e. 256K) per cluster gives much
+ * better performance.
+ */
+#define CLUSTER_SHIFT           7
+#define CLUSTER_PAGES           (1 << CLUSTER_SHIFT)
+#define CLUSTER_SIZE            (CLUSTER_PAGES * PAGE_CACHE_SIZE)
+#define CLUSTER_MASK            (~(CLUSTER_SIZE - 1))
+#define CLUSTER_NR(off)         ((off) >> (CLUSTER_SHIFT + PAGE_CACHE_SHIFT))
+#define CLUSTER_HASH(ino, off)  (((ino) ^ CLUSTER_NR(off)) & (CLUSTER_HASH_SIZE - 1))
+#define REQUEST_NR(off)         (((off) >> PAGE_CACHE_SHIFT) & (CLUSTER_PAGES - 1))
+
+
+/*
+ * This is a cluster of read/write requests that should be sent to
+ * the server together.
+ *
+ * Note that there may be more than one cluster for the same file
+ * region, but with different credentials and pid.
+ */
+struct nfs_cluster {
+	struct rpc_listitem	cl_list;	/* list of clusters per inode */
+	struct nfs_cluster	*hash_prev;	/* hash chains */
+	struct nfs_cluster	*hash_next;
+
+	/* List of requests */
+	struct nfs_page		*request[CLUSTER_PAGES];
+
+	struct file		*file;		/* file */
+	__u64			start;		/* file range covered */
+	__u64			end;
+	unsigned long		nextscan;	/* when to flush aged bufs */
+	unsigned short		count;		/* # of references */
+	unsigned short		pages;		/* # of requests total */
+	unsigned short		dirty;		/* # of dirty requests */
+	unsigned short		pending;	/* # of pending WRITE calls */
+
+	unsigned short		sequence;	/* next commit sequence # */
+	unsigned short		committing;	/* pending commit sequence # */
+	struct nfs_write_data	wdata;		/* Commit rpc call info */
+#define MAX_COMMIT_SEQ		0xFFFF
+#define IS_DIRTY(req)		((req)->wdata.commit == MAX_COMMIT_SEQ)
+	struct wait_queue	*wait;
+};
+
+
+#define CL_NEXT(c)	((struct nfs_cluster *)((c)->cl_list.next))
+
+/*
+ * Functions
+ */
+extern int		nfs_flush_wback(struct nfs_page *, int);
+extern int		sync_cluster(struct nfs_cluster *, int);
+extern int		scan_cluster(struct nfs_cluster *, unsigned int);
+extern int		wait_cluster(struct nfs_cluster *);
+extern void		delete_cluster(struct nfs_cluster *);
+
+extern int		nfs_strategy(struct nfs_cluster *);
+
+extern int		commit_cluster(struct nfs_cluster *, int);
+
+static inline void
+release_cluster(struct nfs_cluster *cluster)
+{
+	if (cluster && --(cluster->count) == 0)
+		delete_cluster(cluster);
+}
+
+/* Request functions */
+extern void		cluster_schedule_scan(struct nfs_cluster *,
+					      unsigned long);
+extern void		nfs_release_request(struct nfs_page *);
+extern int		nfs_update_request(struct file *,
+					   struct page *,
+					   __u64, __u64,
+					   struct nfs_page **);
+
+/* Blech: global spinlock */
+extern rwlock_t nfs_wreq_lock;
+
+/*
+ * Mark a request dirty.
+ */
+static inline void
+nfs_mark_request_dirty(struct nfs_cluster *cluster, struct nfs_page *req)
+{
+	if (req->wdata.commit != MAX_COMMIT_SEQ) {
+		cluster->dirty++;
+		req->wdata.commit = MAX_COMMIT_SEQ;
+	}
+}
+
+/*
+ * Lock the page of an asynchronous request
+ */
+static inline int
+nfs_lock_page(struct nfs_page *req)
+{
+        struct page *page = req->page;
+
+        if (test_and_set_bit(PG_locked, &page->flags))
+		return 0;
+        req->flags |= PG_UNLOCK_AFTER;
+        return 1;
+}
+
+static inline void
+nfs_unlock_page(struct page *page)
+{
+	clear_bit(PG_locked, &page->flags);
+	wake_up(&page->wait);
+}
+#endif
+
+
+
+#endif
--- linux/include/linux/nfs_fs.h.nfsattack-gafton	Tue May 11 13:35:46 1999
+++ linux/include/linux/nfs_fs.h	Fri Feb  4 23:26:34 2000
@@ -9,13 +9,18 @@
 #ifndef _LINUX_NFS_FS_H
 #define _LINUX_NFS_FS_H
 
+#ifdef __KERNEL__
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/kernel.h>
 #include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
 
 #include <linux/sunrpc/sched.h>
+#endif /* __KERNEL__ */
+
 #include <linux/nfs.h>
-#include <linux/nfs_mount.h>
 
 /*
  * Enable debugging support for nfs client.
@@ -25,16 +30,7 @@
 # define NFS_DEBUG
 #endif
 
-/*
- * NFS_MAX_DIRCACHE controls the number of simultaneously cached
- * directory chunks. Each chunk holds the list of nfs_entry's returned
- * in a single readdir call in a memory region of size PAGE_SIZE.
- *
- * Note that at most server->rsize bytes of the cache memory are used.
- */
-#define NFS_MAX_DIRCACHE		16
-
-#define NFS_MAX_FILE_IO_BUFFER_SIZE	16384
+#define NFS_MAX_FILE_IO_BUFFER_SIZE	32768
 #define NFS_DEF_FILE_IO_BUFFER_SIZE	4096
 
 /*
@@ -43,30 +39,31 @@
 #define NFS_MAX_RPC_TIMEOUT		(6*HZ)
 
 /*
- * Size of the lookup cache in units of number of entries cached.
- * It is better not to make this too large although the optimum
- * depends on a usage and environment.
- */
-#define NFS_LOOKUP_CACHE_SIZE		64
-
-/*
  * superblock magic number for NFS
  */
 #define NFS_SUPER_MAGIC			0x6969
 
-#define NFS_FH(dentry)			((struct nfs_fh *) ((dentry)->d_fsdata))
+#ifdef __KERNEL__
+/*
+ * Convenience macros
+ */
+#define NFS_FH(dentry)			(&((struct nfs_dentry *) ((dentry)->d_fsdata))->fh)
 #define NFS_DSERVER(dentry)		(&(dentry)->d_sb->u.nfs_sb.s_server)
 #define NFS_SERVER(inode)		(&(inode)->i_sb->u.nfs_sb.s_server)
 #define NFS_CLIENT(inode)		(NFS_SERVER(inode)->client)
+#define NFS_PROTO(inode)		(NFS_SERVER(inode)->rpc_ops)
+#define NFS_REQUESTLIST(inode)		(NFS_SERVER(inode)->rw_requests)
 #define NFS_ADDR(inode)			(RPC_PEERADDR(NFS_CLIENT(inode)))
 #define NFS_CONGESTED(inode)		(RPC_CONGESTED(NFS_CLIENT(inode)))
-
+#define NFS_COOKIEVERF(inode)		((inode)->u.nfs_i.cookieverf)
 #define NFS_READTIME(inode)		((inode)->u.nfs_i.read_cache_jiffies)
-#define NFS_OLDMTIME(inode)		((inode)->u.nfs_i.read_cache_mtime)
+#define NFS_CACHE_CTIME(inode)		((inode)->u.nfs_i.read_cache_ctime)
+#define NFS_CACHE_MTIME(inode)		((inode)->u.nfs_i.read_cache_mtime)
+#define NFS_CACHE_ATIME(inode)		((inode)->u.nfs_i.read_cache_atime)
+#define NFS_CACHE_ISIZE(inode)		((inode)->u.nfs_i.read_cache_isize)
 #define NFS_CACHEINV(inode) \
 do { \
-	NFS_READTIME(inode) = jiffies - 1000000; \
-	NFS_OLDMTIME(inode) = 0; \
+	NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; \
 } while (0)
 #define NFS_ATTRTIMEO(inode)		((inode)->u.nfs_i.attrtimeo)
 #define NFS_MINATTRTIMEO(inode) \
@@ -75,10 +72,18 @@
 #define NFS_MAXATTRTIMEO(inode) \
 	(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
 			       : NFS_SERVER(inode)->acregmax)
+#define NFS_ATTRTIMEO_UPDATE(inode)	((inode)->u.nfs_i.attrtimeo_timestamp)
 
 #define NFS_FLAGS(inode)		((inode)->u.nfs_i.flags)
-#define NFS_REVALIDATING(inode)		(NFS_FLAGS(inode) & NFS_INO_REVALIDATE)
-#define NFS_WRITEBACK(inode)		((inode)->u.nfs_i.writeback)
+#define NFS_REVALIDATING(inode)		(NFS_FLAGS(inode) & NFS_INO_REVALIDATING)
+#define NFS_CLUSTERS(inode)		((inode)->u.nfs_i.dirty)
+
+#define NFS_FILEID(inode)	((inode)->u.nfs_i.fileid)
+#define NFS_FSID(inode)		((inode)->u.nfs_i.fsid)
+
+#define NFS_FILE(filp)		((struct nfs_file *)(filp)->private_data)
+#define NFS_FILE_READTIME(filp)	(NFS_FILE(filp)->mtime)
+#define NFS_DENTRY(dentry)	((struct nfs_dentry*)(dentry)->d_fsdata)
 
 /*
  * These are the default flags for swap requests
@@ -88,116 +93,151 @@
 /* Flags in the RPC client structure */
 #define NFS_CLNTF_BUFSIZE	0x0001	/* readdir buffer in longwords */
 
-#ifdef __KERNEL__
+#define NFS_RW_SYNC		0x0001	/* O_SYNC handling */
+#define NFS_RW_SWAP		0x0002	/* This is a swap request */
+
+/*
+ * When flushing a cluster of dirty pages, there can be different
+ * strategies:
+ */
+#define FLUSH_AGING             0       /* only flush old buffers */
+#define FLUSH_SYNC              1       /* file being synced, or contention */
+#define FLUSH_INVALIDATE        2       /* pages will be invalidated */
+#define FLUSH_WAIT              4       /* wait for completion */
+#define FLUSH_STABLE            8       /* commit to stable storage */
+
+
+/*
+ * Structure for dentry->d_fsdata;
+ */
+struct nfs_dentry {
+	struct nfs_fh		fh;		/* File handle */
+	struct rpc_cred*	cred;		/* RPC Credentials */
+};
 
 /*
- * This struct describes a file region to be written.
- * It's kind of a pity we have to keep all these lists ourselves, rather
- * than sticking an extra pointer into struct page.
- */
-struct nfs_wreq {
-	struct rpc_listitem	wb_list;	/* linked list of req's */
-	struct rpc_task		wb_task;	/* RPC task */
-	struct file *		wb_file;	/* dentry referenced */
-	struct page *		wb_page;	/* page to be written */
-	struct wait_queue *	wb_wait;	/* wait for completion */
-	unsigned int		wb_offset;	/* offset within page */
-	unsigned int		wb_bytes;	/* dirty range */
-	unsigned int		wb_count;	/* user count */
-	int			wb_status;
-	pid_t			wb_pid;		/* owner process */
-	unsigned short		wb_flags;	/* status flags */
+ * Structure for file->private_data;
+ */
+struct nfs_file {
+	struct rpc_cred*	cred;		/* RPC Credentials */
+	__u64			mtime;		/* Time of last access */
+};
 
-	struct nfs_writeargs	wb_args;	/* NFS RPC stuff */
-	struct nfs_fattr	wb_fattr;	/* file attributes */
+
+/*
+ * Argument struct for decode_entry function
+ */
+struct nfs_entry {
+	struct page *		page;
+	__u64			ino;
+	__u64			cookie;
+	const char *		name;
+	unsigned int		len;
+	int			eof;
+	struct nfs_fh		fh;
+	struct nfs_fattr	fattr;
+	unsigned long		offset,
+				prev;
 };
 
-#define WB_NEXT(req)		((struct nfs_wreq *) ((req)->wb_list.next))
+/*
+ * RPC procedure vector for NFSv2/NFSv3 demuxing
+ */
+struct nfs_rpc_ops {
+	int	version;		/* Protocol version */
+
+	int	(*getroot)(struct nfs_server *,
+			struct nfs_fh *, struct nfs_fattr *);
+	int	(*getattr)(struct dentry *, struct nfs_fattr *);
+	int	(*setattr)(struct dentry *, struct nfs_fattr *, struct iattr *);
+	int	(*lookup)(struct dentry *, struct nfs_fattr *, struct qstr *,
+			struct nfs_fh *, struct nfs_fattr *);
+	int	(*access)(struct dentry *, int fmode, struct nfs_fattr *);
+	int	(*readlink)(struct dentry *, struct nfs_fattr *,
+			void *buffer, unsigned int buflen);
+	int	(*read)(struct dentry *, struct nfs_fattr *,
+			struct rpc_cred *,
+			int flags, unsigned long offset,
+			unsigned int count, void *buffer, int *eofp);
+	int	(*write)(struct dentry *, struct nfs_fattr *,
+			struct rpc_cred *,
+			int flags, unsigned long offset,
+			unsigned int count, void *buffer,
+			struct nfs_writeverf *verfp);
+	int	(*commit)(struct dentry *, struct nfs_fattr *,
+			struct rpc_cred *,
+			unsigned long, unsigned int);
+	int	(*create)(struct dentry *, struct nfs_fattr *,
+			struct qstr *, struct iattr *, int flags,
+			struct nfs_fh *, struct nfs_fattr *);
+	int	(*remove)(struct dentry *, struct nfs_fattr *,
+			  struct qstr *, struct rpc_cred *);
+	int	(*rename)(struct dentry *, struct nfs_fattr *, struct qstr *,
+			struct dentry *, struct nfs_fattr *, struct qstr *);
+	int	(*link)(struct dentry *, struct nfs_fattr *,
+			struct dentry *, struct nfs_fattr *, struct qstr *);
+	int	(*symlink)(struct dentry *, struct nfs_fattr *, struct qstr *,
+			struct qstr *, struct iattr *,
+			struct nfs_fh *, struct nfs_fattr *);
+	int	(*mkdir)(struct dentry *, struct nfs_fattr *, struct qstr *,
+			struct iattr *, struct nfs_fh *, struct nfs_fattr *);
+	int	(*rmdir)(struct dentry *, struct nfs_fattr *, struct qstr *);
+	int	(*readdir)(struct dentry *, struct nfs_fattr *,
+			struct rpc_cred *,
+			u64 cookie, void *, unsigned int size, int plus);
+	int	(*mknod)(struct dentry *, struct nfs_fattr *, struct qstr *,
+			struct iattr *, dev_t,
+			struct nfs_fh *, struct nfs_fattr *);
+	int	(*statfs)(struct nfs_server *, struct nfs_fh *,
+			struct nfs_fsinfo *);
+	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
+};
 
 /*
- * Various flags for wb_flags
+ * 	NFS_CALL(getattr, inode, (fattr));
+ * into
+ *	NFS_PROTO(inode)->getattr(fattr);
  */
-#define NFS_WRITE_CANCELLED	0x0004	/* has been cancelled */
-#define NFS_WRITE_UNCOMMITTED	0x0008	/* written but uncommitted (NFSv3) */
-#define NFS_WRITE_INVALIDATE	0x0010	/* invalidate after write */
-#define NFS_WRITE_INPROGRESS	0x0100	/* RPC call in progress */
-#define NFS_WRITE_COMPLETE	0x0200	/* RPC call completed */
-
-#define WB_CANCELLED(req)	((req)->wb_flags & NFS_WRITE_CANCELLED)
-#define WB_UNCOMMITTED(req)	((req)->wb_flags & NFS_WRITE_UNCOMMITTED)
-#define WB_INVALIDATE(req)	((req)->wb_flags & NFS_WRITE_INVALIDATE)
-#define WB_INPROGRESS(req)	((req)->wb_flags & NFS_WRITE_INPROGRESS)
-#define WB_COMPLETE(req)	((req)->wb_flags & NFS_WRITE_COMPLETE)
-
-/*
- * linux/fs/nfs/proc.c
- */
-extern int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fattr *fattr);
-extern int nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_sattr *sattr, struct nfs_fattr *fattr);
-extern int nfs_proc_lookup(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name, struct nfs_fh *fhandle,
-			struct nfs_fattr *fattr);
-extern int nfs_proc_readlink(struct nfs_server *server, struct nfs_fh *fhandle,
-			void **p0, char **string, unsigned int *len,
-			unsigned int maxlen);
-extern int nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle,
-			int swap, unsigned long offset, unsigned int count,
-			void *buffer, struct nfs_fattr *fattr);
-extern int nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle,
-			int swap, unsigned long offset, unsigned int count,
-			const void *buffer, struct nfs_fattr *fattr);
-extern int nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name, struct nfs_sattr *sattr,
-			struct nfs_fh *fhandle, struct nfs_fattr *fattr);
-extern int nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name);
-extern int nfs_proc_rename(struct nfs_server *server,
-			struct nfs_fh *old_dir, const char *old_name,
-			struct nfs_fh *new_dir, const char *new_name);
-extern int nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fh *dir, const char *name);
-extern int nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name, const char *path,
-			struct nfs_sattr *sattr);
-extern int nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name, struct nfs_sattr *sattr,
-			struct nfs_fh *fhandle, struct nfs_fattr *fattr);
-extern int nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir,
-			const char *name);
-extern int nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle,
-			u32 cookie, unsigned int size, __u32 *entry);
-extern int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
-			struct nfs_fsinfo *res);
+#define NFS_CALL(op, inode, args)	NFS_PROTO(inode)->op args
+
 
+/*
+ * Function vectors etc. for the NFS client
+ */
+extern struct nfs_rpc_ops	nfs_v2_clientops;
+extern struct nfs_rpc_ops	nfs_v3_clientops;
+extern struct rpc_version	nfs_version2;
+extern struct rpc_version	nfs_version3;
+extern struct rpc_program	nfs_program;
 
 /*
  * linux/fs/nfs/inode.c
  */
-extern struct super_block *nfs_read_super(struct super_block *, void *, int);
-extern int init_nfs_fs(void);
 extern struct inode *nfs_fhget(struct dentry *, struct nfs_fh *,
-				struct nfs_fattr *);
-extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
-extern int nfs_revalidate(struct dentry *);
-extern int nfs_open(struct inode *, struct file *);
-extern int nfs_release(struct inode *, struct file *);
-extern int _nfs_revalidate_inode(struct nfs_server *, struct dentry *);
+			       struct nfs_fattr *);
+extern struct super_block *nfs_read_super(struct super_block *, void *, int);
+extern int	init_nfs_fs(void);
+extern int	nfs_zap_caches(struct inode *);
+extern int	nfs_revalidate(struct dentry *);
+extern int	__nfs_revalidate_inode(struct dentry *);
+extern int	nfs_refresh_inode(struct inode *, struct nfs_fattr *);
+extern int	nfs_wait_on_inode(struct inode *, int flag);
+extern void	nfs_unlock_inode(struct inode *);
+extern int	nfs_update_atime(struct dentry *);
+
 
 /*
  * linux/fs/nfs/file.c
  */
 extern struct inode_operations nfs_file_inode_operations;
+extern struct nfs_file *nfs_file_alloc(void);
+extern void nfs_file_free(struct nfs_file *);
 
 /*
  * linux/fs/nfs/dir.c
  */
 extern struct inode_operations nfs_dir_inode_operations;
 extern struct dentry_operations nfs_dentry_operations;
-extern void nfs_free_dircache(void);
-extern void nfs_invalidate_dircache(struct inode *);
-extern void nfs_invalidate_dircache_sb(struct super_block *);
 
 /*
  * linux/fs/nfs/symlink.c
@@ -213,50 +253,177 @@
  * linux/fs/nfs/write.c
  */
 extern int  nfs_writepage(struct file *, struct page *);
-extern int  nfs_check_failed_request(struct inode *);
+extern int  nfs_updatepage(struct file *, struct page *, const char *,
+                        unsigned long, unsigned int, int);
+
 
 /*
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern int  nfs_wb_all(struct inode *);
-extern int  nfs_wb_page(struct inode *, struct page *);
-extern int  nfs_wb_file(struct inode *, struct file *);
+extern int  nfs_sync_file(struct inode *, struct file *, __u64, __u64, int);
+
+static inline int
+nfs_wb_all(struct inode *inode)
+{
+	int error = nfs_sync_file(inode, 0, 0, NFS_OFFSET_MAX, FLUSH_WAIT);
+	return (error < 0) ? error : 0;
+}
+
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+static inline int
+nfs_wb_page(struct inode *inode, struct page* page)
+{
+	int error = nfs_sync_file(inode, 0, page->offset, page->offset + PAGE_CACHE_SIZE, FLUSH_WAIT);
+	return (error < 0) ? error : 0;
+}
+
+
+/*
+ * Write back all pending writes for one user.. 
+ */
+static inline int
+nfs_wb_file(struct inode *inode, struct file *file)
+{
+	int error = nfs_sync_file(inode, file, 0, NFS_OFFSET_MAX, FLUSH_WAIT);
+	return (error < 0) ? error : 0;
+}
 
 /*
  * Invalidate write-backs, possibly trying to write them
  * back first..
  */
-extern void nfs_inval(struct inode *);
-extern int  nfs_updatepage(struct file *, struct page *, unsigned long, unsigned int, int);
+static inline int
+nfs_invalidate_pages(struct inode *inode)
+{
+	return nfs_sync_file(inode, 0, 0, NFS_OFFSET_MAX, FLUSH_INVALIDATE|FLUSH_STABLE);
+}
 
 /*
  * linux/fs/nfs/read.c
  */
 extern int  nfs_readpage(struct file *, struct page *);
+extern struct page *  nfs_find_one_page(struct inode *, unsigned long);
+
+static __inline__
+struct page *nfs_find_lock_page(struct inode * inode, unsigned long offset)
+{
+	struct page *page = nfs_find_one_page(inode, offset);
+
+	if (!page)
+		return NULL;
+	while(test_and_set_bit(PG_locked, &page->flags))
+		wait_on_page(page);
+	return page;
+}
+
+
+/*
+ * linux/fs/nfs2xdr.c
+ */
+extern u32 *nfs_decode_dirent(u32 *, struct nfs_entry *, int);
+
+/*
+ * linux/fs/nfs2xdr.c
+ */
+extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
 /*
  * linux/fs/mount_clnt.c
  * (Used only by nfsroot module)
  */
 extern int  nfs_mount(struct sockaddr_in *, char *, struct nfs_fh *);
+extern int  nfs3_mount(struct sockaddr_in *, char *, struct nfs_fh *);
 
 /*
  * inline functions
  */
 static inline int
-nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry)
+nfs_revalidate_inode(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	if (jiffies - NFS_READTIME(inode) < NFS_ATTRTIMEO(inode))
+	if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode)))
 		return 0;
-	return _nfs_revalidate_inode(server, dentry);
+	return __nfs_revalidate_inode(dentry);
 }
 
+/*
+ * Sync a file or file region.
+ *
+ * We don't distinguish between pages written by the current process or
+ * someone else, because people may do weird things like
+ * write a lot, fork, and fsync() in the child.
+ */
+static inline int
+nfs_flush_dirty_pages(struct inode *inode , struct file *file, loff_t start, loff_t end)
+{
+	return nfs_sync_file(inode, file, start, end, FLUSH_WAIT);
+}
+
+static inline off_t
+nfs_size_to_off_t(__u64 size)
+{
+	return (size > (__u64)LONG_MAX) ? (off_t)LONG_MAX : (off_t) size;
+}
+
+static inline unsigned long
+nfs_fileid_to_ino_t(u64 fileid)
+{
+	unsigned long ino = (unsigned long) fileid;
+	if (sizeof(unsigned long) < sizeof(u64))
+		ino ^= fileid >> (sizeof(u64)-sizeof(unsigned long)) * 8;
+	return ino;
+}
+
+static inline time_t
+nfs_time_to_secs(__u64 time)
+{
+	return (time_t)(time >> 32);
+}
+
+static __inline__ struct rpc_cred *
+nfs_file_cred(struct file *filp)
+{
+	if (!NFS_FILE(filp)) {
+		printk("nfs_file_cred: invalid file!\n");
+		return NULL;
+	}
+	return NFS_FILE(filp)->cred;
+}
+
+static __inline__ struct rpc_cred *
+nfs_dentry_cred(struct dentry *dentry)
+{
+
+	if (!NFS_DENTRY(dentry))
+		return NULL;
+	return NFS_DENTRY(dentry)->cred;
+}
+
+
+/* NFS cluster */
+extern int               nfs_wait_on_congest(struct inode *);
+extern int		 nfs_wait_on_page(struct inode *, struct page *);
+
 /* NFS root */
 
 extern int nfs_root_mount(struct super_block *sb);
 
+#define nfs_wait_event(clnt, wq, condition)				\
+({									\
+	int __retval = 0;						\
+	if (clnt->cl_intr) {						\
+		sigset_t oldmask;					\
+		rpc_clnt_sigmask(clnt, &oldmask);			\
+		__retval = wait_event_interruptible(wq, condition);	\
+		rpc_clnt_sigunmask(clnt, &oldmask);			\
+	} else								\
+		wait_event(wq, condition);				\
+	__retval;							\
+})
+
 #endif /* __KERNEL__ */
 
 /*
@@ -270,6 +437,7 @@
 #define NFSDBG_XDR		0x0020
 #define NFSDBG_FILE		0x0040
 #define NFSDBG_ROOT		0x0080
+#define NFSDBG_PARANOID		0x0100
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
@@ -278,6 +446,11 @@
 #  define ifdebug(fac)		if (nfs_debug & NFSDBG_##fac)
 # else
 #  define ifdebug(fac)		if (0)
+# endif
+# ifdef NFS_PARANOIA
+#  define nfsparanoid(args...)	dfprintk(PARANOID, ##args)
+# else
+#  define nfsparanoid(args...)	do { } while (0)
 # endif
 #endif /* __KERNEL */
 
--- linux/include/linux/nfs_fs_i.h.nfsattack-gafton	Sat Mar  6 18:04:35 1999
+++ linux/include/linux/nfs_fs_i.h	Fri Feb  4 23:26:34 2000
@@ -1,4 +1,4 @@
-#ifndef _NFS_FS_I
+#ifndef _NFS_FS_i
 #define _NFS_FS_I
 
 #include <linux/nfs.h>
@@ -16,6 +16,13 @@
 	struct pipe_inode_info	pipeinfo;
 
 	/*
+	 * The 64bit fileid
+	 */
+	__u64 fsid;
+	__u64 fileid;
+	struct list_head i_alias64;
+
+	/*
 	 * Various flags
 	 */
 	unsigned short		flags;
@@ -38,22 +45,55 @@
 	 *	mtime != read_cache_mtime
 	 */
 	unsigned long		read_cache_jiffies;
-	unsigned long		read_cache_mtime;
+	__u64			read_cache_ctime;
+	__u64			read_cache_mtime;
+	__u64			read_cache_atime;
+	__u64			read_cache_isize;
 	unsigned long		attrtimeo;
+	unsigned long		attrtimeo_timestamp;
+
+	/*
+	 * This is the cookie verifier used for NFSv3 readdir
+	 * operations
+	 */
+	__u32			cookieverf[2];
 
 	/*
-	 * This is the list of dirty unwritten pages.
-	 * NFSv3 will want to add a list for written but uncommitted
-	 * pages.
+	 * This is the list of clusters of dirty pages.
 	 */
-	struct nfs_wreq *	writeback;
+	struct nfs_cluster *	dirty;
 };
 
 /*
  * Legal inode flag values
  */
-#define NFS_INO_REVALIDATE	0x0001		/* revalidating attrs */
-#define NFS_IS_SNAPSHOT		0x0010		/* a snapshot file */
+#define NFS_INO_LOCKED          0x0001          /* locked for revalidation */
+#define NFS_INO_ACCESSOK        0x0002          /* have called ACCES */
+#define NFS_INO_WRITE_ERROR     0x0004          /* a write error occurred */
+#define NFS_INO_ADVISE_RDPLUS   0x0008          /* advise readdirplus */
+#define NFS_INO_REVALIDATING    0x0010          /* in nfs_revalidate() */
+#define NFS_INO_INVALIDATE      0x0020          /* zap cache on next occasion */
+#define NFS_IS_SNAPSHOT		0x0040		/* a snapshot file */
+
+/*
+ * NFS ACL info.
+ * This information will be used by nfs_permission() in the obvious fashion,
+ * but also helps the RPC engine to select whether to try the operation first
+ * with the effective or real uid/gid first.
+ *
+ * For NFSv2, this info is obtained by just trying the operation in
+ * question and updating the ACL info according to the result.
+ * For NFSv3, the access() call is used to fill in the permission bits.
+ *
+ * Not yet used.
+ */
+struct nfs_acl_info {
+	struct nfs_acl_info *	acl_next;
+	unsigned long		acl_read_time;
+	uid_t			acl_uid;
+	gid_t			acl_gid;
+	unsigned int		acl_bits;
+};
 
 /*
  * NFS lock info
@@ -61,6 +101,7 @@
 struct nfs_lock_info {
 	u32		state;
 	u32		flags;
+	struct nlm_host	*host;
 };
 
 /*
--- linux/include/linux/nfs_fs_sb.h.nfsattack-gafton	Tue May 11 13:35:43 1999
+++ linux/include/linux/nfs_fs_sb.h	Fri Feb  4 23:26:34 2000
@@ -1,31 +1,45 @@
 #ifndef _NFS_FS_SB
 #define _NFS_FS_SB
 
-#include <linux/nfs.h>
 #include <linux/in.h>
 
+#define CLUSTER_HASH_SIZE       16
+
 /*
  * NFS client parameters stored in the superblock.
  */
 struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
+	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
 	int			flags;		/* various flags */
-	int			rsize;		/* read size */
-	int			wsize;		/* write size */
+	unsigned int		rsize;		/* read size */
+	unsigned int		rpages;		/* read size (in pages) */
+	unsigned int		wsize;		/* write size */
+	unsigned int		wpages;		/* write size (in pages) */
+	unsigned int		dtsize;		/* readdir size */
 	unsigned int		bsize;		/* server block size */
 	unsigned int		acregmin;	/* attr cache timeouts */
 	unsigned int		acregmax;
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
+	unsigned int		namelen;
 	char *			hostname;	/* remote hostname */
+	struct nfs_dircache *	dircache;	/* readdir cache info */
+	struct nfs_reqlist *	rw_requests;    /* async read/write requests */
 };
 
+
 /*
  * nfs super-block data in memory
  */
 struct nfs_sb_info {
-	struct nfs_server	s_server;
-	struct nfs_fh		s_root;
+	struct nfs_server	s_server;	/* NFS server info */
+	unsigned int		s_fhsize;	/* File handle size */
+	struct nfs_fh *		s_root;		/* The root file handle */
 };
+
+/* Server writeback cache */
+int nfs_reqlist_init(struct nfs_server *);
+void nfs_reqlist_exit(struct nfs_server *);
 
 #endif
--- linux/include/linux/nfs_mount.h.nfsattack-gafton	Mon Apr  7 14:35:31 1997
+++ linux/include/linux/nfs_mount.h	Fri Feb  4 23:26:34 2000
@@ -8,6 +8,9 @@
  *
  *  structure passed from user-space to kernel-space during an nfs mount
  */
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
 
 /*
  * WARNING!  Do not delete or change the order of these fields.  If
@@ -15,13 +18,17 @@
  * tracks which fields are present.  This will ensure some measure of
  * mount-to-kernel version compatibility.  Some of these aren't used yet
  * but here they are anyway.
+ *
+ * Lugging around two file handles is inevitable if we want to pass
+ * version 3 handles while maintaining compatibility. We may want to
+ * break it for the 2.2 kernel eventually...
  */
-#define NFS_MOUNT_VERSION	3
+#define NFS_MOUNT_VERSION	4
 
 struct nfs_mount_data {
 	int		version;		/* 1 */
 	int		fd;			/* 1 */
-	struct nfs_fh	root;			/* 1 */
+	struct nfs2_fh	old_root;		/* 1 */
 	int		flags;			/* 1 */
 	int		rsize;			/* 1 */
 	int		wsize;			/* 1 */
@@ -35,6 +42,7 @@
 	char		hostname[256];		/* 1 */
 	int		namlen;			/* 2 */
 	unsigned int	bsize;			/* 3 */
+	struct nfs_fh	root;			/* 4 */
 };
 
 /* bits in the flags field */
@@ -49,5 +57,17 @@
 #define NFS_MOUNT_VER3		0x0080	/* 3 */
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
+#define NFS_MOUNT_FLAGMASK	0xFFFF
+
+/*
+ * Private flags - not to be set by mount program
+ */
+#ifdef __KERNEL__
+#include <asm/types.h>
+#if BITS_PER_LONG <= 32
+#define NFS_FUDGE_FILEID
+#define NFS_MOUNT_SWAPINUM	0x00010000
+#endif /* BITS_PER_LONG <= 32 */
+#endif /* __KERNEL__ */
  
 #endif
--- linux/include/linux/pagemap.h.nfsattack-gafton	Tue Jan  4 13:12:25 2000
+++ linux/include/linux/pagemap.h	Fri Feb  4 23:26:34 2000
@@ -150,6 +150,18 @@
 		__wait_on_page(page);
 }
 
+static inline void add_to_page_cache(struct page * page,
+	struct inode * inode, unsigned long offset,
+	struct page **hash)
+{
+	atomic_inc(&page->count);
+	page->flags &= ~((1 << PG_uptodate) | (1 << PG_error));
+	page->flags |= (1 << PG_referenced);
+	page->offset = offset;
+	add_page_to_inode_queue(inode, page);
+	__add_page_to_hash_queue(page, hash);
+}
+
 extern void update_vm_cache_conditional(struct inode *, unsigned long, const char *, int, unsigned long);
 extern void update_vm_cache(struct inode *, unsigned long, const char *, int);
 
--- linux/net/sunrpc/auth.c.nfsattack-gafton	Mon Mar 22 11:00:15 1999
+++ linux/net/sunrpc/auth.c	Fri Feb  4 23:26:34 2000
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/socket.h>
 #include <linux/sunrpc/clnt.h>
+#include <asm/spinlock.h>
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_AUTH
@@ -68,6 +69,8 @@
 	auth->au_ops->destroy(auth);
 }
 
+spinlock_t rpc_credcache_lock = SPIN_LOCK_UNLOCKED;
+
 /*
  * Initialize RPC credential cache
  */
@@ -78,6 +81,15 @@
 	auth->au_nextgc = jiffies + (auth->au_expire >> 1);
 }
 
+static inline void
+rpcauth_crdestroy(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+	if (auth->au_ops->crdestroy)
+		auth->au_ops->crdestroy(cred);
+	else
+		rpc_free(cred);
+}
+
 /*
  * Clear the RPC credential cache
  */
@@ -91,6 +103,7 @@
 	if (!(destroy = auth->au_ops->crdestroy))
 		destroy = (void (*)(struct rpc_cred *)) rpc_free;
 
+	spin_lock(&rpc_credcache_lock);
 	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
 		q = &auth->au_credcache[i];
 		while ((cred = *q) != NULL) {
@@ -98,6 +111,7 @@
 			destroy(cred);
 		}
 	}
+	spin_unlock(&rpc_credcache_lock);
 }
 
 /*
@@ -107,17 +121,15 @@
 rpcauth_gc_credcache(struct rpc_auth *auth)
 {
 	struct rpc_cred	**q, *cred, *free = NULL;
-	int		i, safe = 0;
+	int		i;
 
 	dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth);
+	spin_lock(&rpc_credcache_lock);
 	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
 		q = &auth->au_credcache[i];
 		while ((cred = *q) != NULL) {
-			if (++safe > 500) {
-				printk("RPC: rpcauth_gc_credcache looping!\n");
-				break;
-			}
-			if (!cred->cr_count && time_before(cred->cr_expire, jiffies)) {
+			if (!cred->cr_count &&
+			    time_before(cred->cr_expire, jiffies)) {
 				*q = cred->cr_next;
 				cred->cr_next = free;
 				free = cred;
@@ -126,9 +138,10 @@
 			q = &cred->cr_next;
 		}
 	}
+	spin_unlock(&rpc_credcache_lock);
 	while ((cred = free) != NULL) {
 		free = cred->cr_next;
-		rpc_free(cred);
+		rpcauth_crdestroy(auth, cred);
 	}
 	auth->au_nextgc = jiffies + auth->au_expire;
 }
@@ -136,44 +149,49 @@
 /*
  * Insert credential into cache
  */
-inline void
+void
 rpcauth_insert_credcache(struct rpc_auth *auth, struct rpc_cred *cred)
 {
 	int		nr;
 
 	nr = (cred->cr_uid % RPC_CREDCACHE_NR);
+	spin_lock(&rpc_credcache_lock);
 	cred->cr_next = auth->au_credcache[nr];
 	auth->au_credcache[nr] = cred;
-	cred->cr_expire = jiffies + auth->au_expire;
 	cred->cr_count++;
+	cred->cr_expire = jiffies + auth->au_expire;
+	spin_unlock(&rpc_credcache_lock);
 }
 
 /*
  * Look up a process' credentials in the authentication cache
  */
 static struct rpc_cred *
-rpcauth_lookup_credcache(struct rpc_task *task)
+rpcauth_lookup_credcache(struct rpc_auth *auth, int taskflags)
 {
-	struct rpc_auth	*auth = task->tk_auth;
 	struct rpc_cred	**q, *cred = NULL;
-	int		nr;
+	int		nr = 0;
 
-	nr = RPC_DO_ROOTOVERRIDE(task)? 0 : (current->uid % RPC_CREDCACHE_NR);
+	if (!(taskflags & RPC_TASK_ROOTCREDS))
+		nr = current->uid % RPC_CREDCACHE_NR;
 
 	if (time_before(auth->au_nextgc, jiffies))
 		rpcauth_gc_credcache(auth);
 
+	spin_lock(&rpc_credcache_lock);
 	q = &auth->au_credcache[nr];
 	while ((cred = *q) != NULL) {
-		if (auth->au_ops->crmatch(task, cred)) {
+		if (!(cred->cr_flags & RPCAUTH_CRED_DEAD) &&
+		    auth->au_ops->crmatch(cred, taskflags)) {
 			*q = cred->cr_next;
 			break;
 		}
 		q = &cred->cr_next;
 	}
+	spin_unlock(&rpc_credcache_lock);
 
 	if (!cred)
-		cred = auth->au_ops->crcreate(task);
+		cred = auth->au_ops->crcreate(taskflags);
 
 	if (cred)
 		rpcauth_insert_credcache(auth, cred);
@@ -184,39 +202,53 @@
 /*
  * Remove cred handle from cache
  */
-static inline void
+static void
 rpcauth_remove_credcache(struct rpc_auth *auth, struct rpc_cred *cred)
 {
 	struct rpc_cred	**q, *cr;
 	int		nr;
 
 	nr = (cred->cr_uid % RPC_CREDCACHE_NR);
+	spin_lock(&rpc_credcache_lock);
 	q = &auth->au_credcache[nr];
 	while ((cr = *q) != NULL) {
 		if (cred == cr) {
 			*q = cred->cr_next;
-			return;
+			cred->cr_next = NULL;
+			break;
 		}
 		q = &cred->cr_next;
 	}
+	spin_unlock(&rpc_credcache_lock);
+}
+
+struct rpc_cred *
+rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
+{
+	dprintk("RPC:     looking up %s cred\n",
+		auth->au_ops->au_name);
+	return rpcauth_lookup_credcache(auth, taskflags);
 }
 
 struct rpc_cred *
-rpcauth_lookupcred(struct rpc_task *task)
+rpcauth_bindcred(struct rpc_task *task)
 {
+	struct rpc_auth *auth = task->tk_auth;
+
 	dprintk("RPC: %4d looking up %s cred\n",
 		task->tk_pid, task->tk_auth->au_ops->au_name);
-	return task->tk_cred = rpcauth_lookup_credcache(task);
+	task->tk_cred = rpcauth_lookup_credcache(auth, task->tk_flags);
+	if (task->tk_cred == 0)
+		task->tk_status = -ENOMEM;
+	return task->tk_cred;
 }
 
 int
-rpcauth_matchcred(struct rpc_task *task, struct rpc_cred *cred)
+rpcauth_matchcred(struct rpc_auth *auth, struct rpc_cred *cred, int taskflags)
 {
-	struct rpc_auth	*auth = task->tk_auth;
-
-	dprintk("RPC: %4d matching %s cred %p\n",
-		task->tk_pid, auth->au_ops->au_name, task->tk_cred);
-	return auth->au_ops->crmatch(task, cred);
+	dprintk("RPC:     matching %s cred %d\n",
+		auth->au_ops->au_name, taskflags);
+	return auth->au_ops->crmatch(cred, taskflags);
 }
 
 void
@@ -224,27 +256,36 @@
 {
 	dprintk("RPC: %4d holding %s cred %p\n",
 		task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_cred);
-	if (task->tk_cred)
+	if (task->tk_cred) {
 		task->tk_cred->cr_count++;
+		task->tk_cred->cr_expire = jiffies + task->tk_auth->au_expire;
+	}
 }
 
 void
-rpcauth_releasecred(struct rpc_task *task)
+rpcauth_releasecred(struct rpc_auth *auth, struct rpc_cred *cred)
 {
-	struct rpc_auth	*auth = task->tk_auth;
-	struct rpc_cred	*cred;
-
-	dprintk("RPC: %4d releasing %s cred %p\n",
-		task->tk_pid, auth->au_ops->au_name, task->tk_cred);
-	if ((cred = task->tk_cred) != NULL) {
+	if (cred != NULL && cred->cr_count > 0) {
 		cred->cr_count--;
 		if (cred->cr_flags & RPCAUTH_CRED_DEAD) {
 			rpcauth_remove_credcache(auth, cred);
 			if (!cred->cr_count)
-				auth->au_ops->crdestroy(cred);
+				rpcauth_crdestroy(auth, cred);
 		}
-		task->tk_cred = NULL;
 	}
+}
+
+void
+rpcauth_unbindcred(struct rpc_task *task)
+{
+	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_cred	*cred = task->tk_cred;
+
+	dprintk("RPC: %4d releasing %s cred %p\n",
+		task->tk_pid, auth->au_ops->au_name, cred);
+
+	rpcauth_releasecred(auth, cred);
+	task->tk_cred = NULL;
 }
 
 u32 *
--- linux/net/sunrpc/auth_null.c.nfsattack-gafton	Mon Mar 22 11:00:15 1999
+++ linux/net/sunrpc/auth_null.c	Fri Feb  4 23:26:34 2000
@@ -38,6 +38,7 @@
 nul_destroy(struct rpc_auth *auth)
 {
 	dprintk("RPC: destroying NULL authenticator %p\n", auth);
+	rpcauth_free_credcache(auth);
 	rpc_free(auth);
 }
 
@@ -45,15 +46,12 @@
  * Create NULL creds for current process
  */
 static struct rpc_cred *
-nul_create_cred(struct rpc_task *task)
+nul_create_cred(int flags)
 {
 	struct rpc_cred	*cred;
 
-	if (!(cred = (struct rpc_cred *) rpc_malloc(task, sizeof(*cred)))) {
-		task->tk_status = -ENOMEM;
+	if (!(cred = (struct rpc_cred *) rpc_allocate(flags, sizeof(*cred))))
 		return NULL;
-	}
-
 	cred->cr_count = 0;
 	cred->cr_flags = RPCAUTH_CRED_UPTODATE;
 
@@ -73,7 +71,7 @@
  * Match cred handle against current process
  */
 static int
-nul_match(struct rpc_task *task, struct rpc_cred *cred)
+nul_match(struct rpc_cred *cred, int taskflags)
 {
 	return 1;
 }
--- linux/net/sunrpc/auth_unix.c.nfsattack-gafton	Mon Mar 22 11:00:15 1999
+++ linux/net/sunrpc/auth_unix.c	Fri Feb  4 23:26:34 2000
@@ -60,7 +60,7 @@
 }
 
 static struct rpc_cred *
-unx_create_cred(struct rpc_task *task)
+unx_create_cred(int flags)
 {
 	struct unx_cred	*cred;
 	int		i;
@@ -68,14 +68,12 @@
 	dprintk("RPC:      allocating UNIX cred for uid %d gid %d\n",
 				current->uid, current->gid);
 
-	if (!(cred = (struct unx_cred *) rpc_malloc(task, sizeof(*cred)))) {
-		task->tk_status = -ENOMEM;
+	if (!(cred = (struct unx_cred *) rpc_allocate(flags, sizeof(*cred))))
 		return NULL;
-	}
 
 	cred->uc_count = 0;
 	cred->uc_flags = RPCAUTH_CRED_UPTODATE;
-	if (RPC_DO_ROOTOVERRIDE(task)) {
+	if (flags & RPC_TASK_ROOTCREDS) {
 		cred->uc_uid = cred->uc_fsuid = 0;
 		cred->uc_gid = cred->uc_fsgid = 0;
 		cred->uc_gids[0] = NOGROUP;
@@ -131,12 +129,12 @@
  * request root creds (e.g. for NFS swapping).
  */
 static int
-unx_match(struct rpc_task * task, struct rpc_cred *rcred)
+unx_match(struct rpc_cred *rcred, int taskflags)
 {
 	struct unx_cred	*cred = (struct unx_cred *) rcred;
 	int		i;
 
-	if (!RPC_DO_ROOTOVERRIDE(task)) {
+	if (!(taskflags & RPC_TASK_ROOTCREDS)) {
 		int groups;
 
 		if (cred->uc_uid != current->uid
--- linux/net/sunrpc/clnt.c.nfsattack-gafton	Tue Jan  4 13:12:27 2000
+++ linux/net/sunrpc/clnt.c	Fri Feb  4 23:26:34 2000
@@ -29,6 +29,7 @@
 #include <linux/malloc.h>
 #include <linux/in.h>
 #include <linux/utsname.h>
+#include <linux/interrupt.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -49,7 +50,6 @@
 static void	call_encode(struct rpc_task *task);
 static void	call_decode(struct rpc_task *task);
 static void	call_transmit(struct rpc_task *task);
-static void	call_receive(struct rpc_task *task);
 static void	call_status(struct rpc_task *task);
 static void	call_refresh(struct rpc_task *task);
 static void	call_refreshresult(struct rpc_task *task);
@@ -76,6 +76,12 @@
 	dprintk("RPC: creating %s client for %s (xprt %p)\n",
 		program->name, servname, xprt);
 
+#ifdef RPC_DEBUG
+	rpc_register_sysctl();
+#endif
+
+	xdr_init();
+
 	if (!xprt)
 		goto out;
 	if (vers >= program->nrvers || !(version = program->version[vers]))
@@ -94,7 +100,7 @@
 	clnt->cl_port     = xprt->addr.sin_port;
 	clnt->cl_prog     = program->number;
 	clnt->cl_vers     = version->number;
-	clnt->cl_prot     = IPPROTO_UDP;
+	clnt->cl_prot     = xprt->prot;
 	clnt->cl_stats    = program->stats;
 	clnt->cl_bindwait = RPC_INIT_WAITQ("bindwait");
 
@@ -136,7 +142,7 @@
 		clnt->cl_protname, clnt->cl_server);
 	while (clnt->cl_users) {
 #ifdef RPC_DEBUG
-		printk("rpc_shutdown_client: client %s, tasks=%d\n",
+		dprintk("RPC: rpc_shutdown_client: client %s, tasks=%d\n",
 			clnt->cl_protname, clnt->cl_users);
 #endif
 		/* Don't let rpc_release_client destroy us */
@@ -195,7 +201,6 @@
 static void
 rpc_default_callback(struct rpc_task *task)
 {
-	rpc_release_task(task);
 }
 
 /*
@@ -236,42 +241,73 @@
 /*
  * New rpc_call implementation
  */
-int
-rpc_do_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp,
-				int flags, rpc_action func, void *data)
+int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 {
 	struct rpc_task	my_task, *task = &my_task;
 	sigset_t	oldset;
-	int		async, status;
+	int		status;
 
 	/* If this client is slain all further I/O fails */
 	if (clnt->cl_dead) 
 		return -EIO;
 
+	if (flags & RPC_TASK_ASYNC) {
+		printk("rpc_call_sync: Illegal flag combination for synchronous task\n");
+		flags &= ~RPC_TASK_ASYNC;
+	}
+
 	rpc_clnt_sigmask(clnt, &oldset);		
 
 	/* Create/initialize a new RPC task */
-	if ((async = (flags & RPC_TASK_ASYNC)) != 0) {
-		if (!func)
-			func = rpc_default_callback;
-		status = -ENOMEM;
-		if (!(task = rpc_new_task(clnt, func, flags)))
-			goto out;
-		task->tk_calldata = data;
-	} else {
-		rpc_init_task(task, clnt, NULL, flags);
+	rpc_init_task(task, clnt, NULL, flags);
+	rpc_call_setup(task, msg, 0);
+
+	/* Set up the call info struct and execute the task */
+	if (task->tk_status == 0)
+		status = rpc_execute(task);
+	else {
+		status = task->tk_status;
+		rpc_release_task(task);
 	}
 
-	/* Bind the user cred, set up the call info struct and
-	 * execute the task */
-	if (rpcauth_lookupcred(task) != NULL) {
-		rpc_call_setup(task, proc, argp, resp, 0);
-		rpc_execute(task);
-	} else
-		async = 0;
+	rpc_clnt_sigunmask(clnt, &oldset);		
 
-	status = 0;
-	if (!async) {
+	return status;
+}
+
+/*
+ * New rpc_call implementation
+ */
+int
+rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
+	       rpc_action callback, void *data)
+{
+	struct rpc_task	*task;
+	sigset_t	oldset;
+	int		status;
+
+	/* If this client is slain all further I/O fails */
+	if (clnt->cl_dead) 
+		return -EIO;
+
+	flags |= RPC_TASK_ASYNC;
+
+	rpc_clnt_sigmask(clnt, &oldset);		
+
+	/* Create/initialize a new RPC task */
+	if (!callback)
+		callback = rpc_default_callback;
+	status = -ENOMEM;
+	if (!(task = rpc_new_task(clnt, callback, flags)))
+		goto out;
+	task->tk_calldata = data;
+
+	rpc_call_setup(task, msg, 0);
+
+	/* Set up the call info struct and execute the task */
+	if (task->tk_status == 0)
+		status = rpc_execute(task);
+	else {
 		status = task->tk_status;
 		rpc_release_task(task);
 	}
@@ -284,17 +320,27 @@
 
 
 void
-rpc_call_setup(struct rpc_task *task, u32 proc,
-				void *argp, void *resp, int flags)
+rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 {
-	task->tk_action = call_bind;
-	task->tk_proc   = proc;
-	task->tk_argp   = argp;
-	task->tk_resp   = resp;
+	task->tk_proc   = msg->proc;
+	task->tk_argp   = msg->arg;
+	task->tk_resp   = msg->res;
 	task->tk_flags |= flags;
+	/* Bind the user cred */
+	if (msg->cred != NULL) {
+		task->tk_cred = msg->cred;
+		rpcauth_holdcred(task);
+	} else
+		rpcauth_bindcred(task);
+
+	if (task->tk_status == 0)
+		task->tk_action = call_bind;
+	else
+		task->tk_action = NULL;
 
 	/* Increment call count */
-	rpcproc_count(task->tk_client, proc)++;
+	if (task->tk_proc < task->tk_client->cl_maxproc)
+		rpcproc_count(task->tk_client, task->tk_proc)++;
 }
 
 /*
@@ -305,7 +351,6 @@
 rpc_restart_call(struct rpc_task *task)
 {
 	if (task->tk_flags & RPC_TASK_KILLED) {
-		rpc_release_task(task);
 		return;
 	}
 	task->tk_action = call_bind;
@@ -319,11 +364,25 @@
 call_bind(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
+
+	if (task->tk_proc >= clnt->cl_maxproc) {
+		printk(KERN_WARNING "%s (vers %d): bad procedure number %d\n",
+			clnt->cl_protname, clnt->cl_vers, task->tk_proc);
+		rpc_exit(task, -EIO);
+		return;
+	}
 
 	task->tk_action = call_reserve;
 	task->tk_status = 0;
-	if (!clnt->cl_port)
+
+	if (!clnt->cl_port) {
+		if (xprt->stream) {
+			xprt->connected = 0;
+			task->tk_action = call_reconnect;
+		}
 		rpc_getport(task, clnt);
+	}
 }
 
 /*
@@ -334,22 +393,24 @@
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 
-	dprintk("RPC: %4d call_reserve\n", task->tk_pid);
 	if (!clnt->cl_port) {
 		printk(KERN_NOTICE "%s: couldn't bind to server %s - %s.\n",
 			clnt->cl_protname, clnt->cl_server,
 			clnt->cl_softrtry? "giving up" : "retrying");
 		if (!clnt->cl_softrtry) {
+			task->tk_action = call_bind;
 			rpc_delay(task, 5*HZ);
-			return;
-		}
-		rpc_exit(task, -EIO);
+		} else
+			rpc_exit(task, -EIO);
 		return;
 	}
+
+	dprintk("RPC: %4d call_reserve\n", task->tk_pid);
 	if (!rpcauth_uptodatecred(task)) {
 		task->tk_action = call_refresh;
 		return;
 	}
+
 	task->tk_action  = call_reserveresult;
 	task->tk_timeout = clnt->cl_timeout.to_resrvval;
 	task->tk_status  = 0;
@@ -376,25 +437,27 @@
 
 	if (task->tk_status >= 0) {
 		task->tk_action = call_allocate;
-		goto out;
-	} else if (task->tk_status == -EAGAIN) {
+		return;
+	}
+
+	switch (task->tk_status) {
+	case -EAGAIN:
+	case -ENOBUFS:
 		task->tk_timeout = task->tk_client->cl_timeout.to_resrvval;
 		task->tk_status = 0;
-		xprt_reserve(task);
-		goto out;
-	} else if (task->tk_status == -ETIMEDOUT) {
+		task->tk_action = call_reserve;
+		break;
+	case -ETIMEDOUT:
 		dprintk("RPC: task timed out\n");
 		task->tk_action = call_timeout;
-		goto out;
-	} else {
+		break;
+	default:
 		task->tk_action = NULL;
+		if (!task->tk_rqstp) {
+			printk("RPC: task has no request, exit EIO\n");
+			rpc_exit(task, -EIO);
+		}
 	}
-	if (!task->tk_rqstp) {
-		printk("RPC: task has no request, exit EIO\n");
-		rpc_exit(task, -EIO);
-	}
-out:
-	return;
 }
 
 /*
@@ -421,10 +484,10 @@
 		return;
 	printk("RPC: buffer allocation failed for task %p\n", task); 
 
-	if (!signalled()) {
+	if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) {
 		xprt_release(task);
 		task->tk_action = call_reserve;
-		rpc_delay(task, HZ);
+		rpc_delay(task, HZ>>4);
 		return;
 	}
 
@@ -459,13 +522,7 @@
 	req->rq_rvec[0].iov_len  = bufsiz;
 	req->rq_rlen		 = bufsiz;
 	req->rq_rnr		 = 1;
-
-	if (task->tk_proc > clnt->cl_maxproc) {
-		printk(KERN_WARNING "%s (vers %d): bad procedure number %d\n",
-			clnt->cl_protname, clnt->cl_vers, task->tk_proc);
-		rpc_exit(task, -EIO);
-		return;
-	}
+	req->rq_damaged		 = 0;
 
 	/* Zero buffer so we have automatic zero-padding of opaque & string */
 	memset(task->tk_buffer, 0, bufsiz);
@@ -476,7 +533,7 @@
 		printk("RPC: call_header failed, exit EIO\n");
 		rpc_exit(task, -EIO);
 	} else
-	if ((status = encode(req, p, task->tk_argp)) < 0) {
+	if (encode && (status = encode(req, p, task->tk_argp)) < 0) {
 		printk(KERN_WARNING "%s: can't encode arguments: %d\n",
 				clnt->cl_protname, -status);
 		rpc_exit(task, status);
@@ -484,7 +541,7 @@
 }
 
 /*
- * 4.	Transmit the RPC request
+ * 4.	Transmit the RPC request, and wait for reply
  */
 static void
 call_transmit(struct rpc_task *task)
@@ -492,43 +549,19 @@
 	dprintk("RPC: %4d call_transmit (status %d)\n", 
 				task->tk_pid, task->tk_status);
 
-	task->tk_action = call_receive;
 	task->tk_status = 0;
-	xprt_transmit(task);
-}
-
-/*
- * 5.	Wait for the RPC reply
- */
-static void
-call_receive(struct rpc_task *task)
-{
-	dprintk("RPC: %4d call_receive (status %d)\n", 
-		task->tk_pid, task->tk_status);
-
 	task->tk_action = call_status;
-	/* In case of error, evaluate status */
-	if (task->tk_status < 0)
-		return;
-
-	/* If we have no decode function, this means we're performing
-	 * a void call (a la lockd message passing). */
-	if (!rpcproc_decode(task->tk_client, task->tk_proc)) {
-		rpc_remove_wait_queue(task); /* remove from xprt_pending */
-		task->tk_action = NULL;
-		return;
-	}
-
-	xprt_receive(task);
+	xprt_transmit(task);
 }
 
 /*
- * 6.	Sort out the RPC call status
+ * 5.	Sort out the RPC call status
  */
 static void
 call_status(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt	*xprt = clnt->cl_xprt;
 	struct rpc_rqst	*req;
 	int		status = task->tk_status;
 
@@ -537,33 +570,51 @@
 
 	if (status >= 0) {
 		task->tk_action = call_decode;
-	} else if (status == -ETIMEDOUT) {
+		return;
+	}
+
+	task->tk_status = 0;
+	req = task->tk_rqstp;
+	switch(status) {
+	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
-	} else if (status == -EAGAIN) {
-		if (!(req = task->tk_rqstp))
+		break;
+	case -EAGAIN:
+		if (!req)
 			task->tk_action = call_reserve;
 		else if (!task->tk_buffer)
 			task->tk_action = call_allocate;
-		else if (req->rq_damaged)
+		else if (req->rq_damaged) {
 			task->tk_action = call_encode;
-		else
+			clnt->cl_stats->rpcretrans++;
+		} else
 			task->tk_action = call_transmit;
-	} else if (status == -ENOTCONN) {
-		task->tk_action = call_reconnect;
-	} else if (status == -ECONNREFUSED && clnt->cl_autobind) {
-		task->tk_action = call_bind;
-		clnt->cl_port = 0;
-	} else {
+		break;
+	case -ECONNREFUSED:
+	case -ENOTCONN:
+		if (clnt->cl_autobind || !clnt->cl_port) {
+			clnt->cl_port = 0;
+			task->tk_action = call_bind;
+		} else if (xprt->stream)
+			task->tk_action = call_reconnect;
+		else {
+			if (req->rq_damaged)
+				task->tk_action = call_encode;
+			else
+				task->tk_action = call_transmit;
+			clnt->cl_stats->rpcretrans++;
+		}
+		break;
+	default:
 		if (clnt->cl_chatty)
 			printk("%s: RPC call returned error %d\n",
-				clnt->cl_protname, -status);
-		task->tk_action = NULL;
-		return;
+			       clnt->cl_protname, -status);
+		rpc_exit(task, status);
 	}
 }
 
 /*
- * 6a.	Handle RPC timeout
+ * 5a.	Handle RPC timeout
  * 	We do not release the request slot, so we keep using the
  *	same XID for all retransmits.
  */
@@ -581,9 +632,11 @@
 				task->tk_pid);
 			goto minor_timeout;
 		}
-		to->to_initval <<= 1;
-		if (to->to_initval > to->to_maxval)
-			to->to_initval = to->to_maxval;
+		to->to_retries = clnt->cl_timeout.to_retries;
+	} else {
+		printk("%s: task %d can't get a request slot\n",
+		       clnt->cl_protname, task->tk_pid);
+		goto minor_timeout;
 	}
 
 	dprintk("RPC: %4d call_timeout (major timeo)\n", task->tk_pid);
@@ -594,21 +647,24 @@
 		rpc_exit(task, -EIO);
 		return;
 	}
+
 	if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
-		if (req)
-			printk("%s: server %s not responding, still trying\n",
-				clnt->cl_protname, clnt->cl_server);
-		else 
-			printk("%s: task %d can't get a request slot\n",
-				clnt->cl_protname, task->tk_pid);
+		printk("%s: server %s not responding, timed out\n",
+		       clnt->cl_protname, clnt->cl_server);
+	} else if (clnt->cl_chatty) {
+		printk("%s: server %s not responding, still trying\n",
+		       clnt->cl_protname, clnt->cl_server);
 	}
+
 	if (clnt->cl_autobind)
 		clnt->cl_port = 0;
 
 minor_timeout:
 	if (!clnt->cl_port) {
 		task->tk_action = call_bind;
+	} else if (clnt->cl_xprt->stream && !clnt->cl_xprt->connected) {
+		task->tk_action = call_reconnect;
 	} else if (!req) {
 		task->tk_action = call_reserve;
 	} else if (req->rq_damaged) {
@@ -622,24 +678,22 @@
 }
 
 /*
- * 6b.	Reconnect to the RPC server (TCP case)
+ * 5b.	Reconnect to the RPC server (TCP case)
  */
 static void
 call_reconnect(struct rpc_task *task)
 {
+	struct rpc_clnt *clnt = task->tk_client;
 	dprintk("RPC: %4d call_reconnect status %d\n",
 				task->tk_pid, task->tk_status);
-	if (task->tk_status == 0) {
-		task->tk_action = call_status;
-		task->tk_status = -EAGAIN;
-		return;
-	}
-	task->tk_client->cl_stats->netreconn++;
+	task->tk_action = call_reserve;
+	task->tk_status = 0;
+	clnt->cl_stats->netreconn++;
 	xprt_reconnect(task);
 }
 
 /*
- * 7.	Decode the RPC reply
+ * 6.	Decode the RPC reply
  */
 static void
 call_decode(struct rpc_task *task)
@@ -684,13 +738,15 @@
 	}
 
 	task->tk_action = NULL;
-	task->tk_status = decode(req, p, task->tk_resp);
+
+	if (decode)
+		task->tk_status = decode(req, p, task->tk_resp);
 	dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
 					task->tk_status);
 }
 
 /*
- * 8.	Refresh the credentials if rejected by the server
+ * 7.	Refresh the credentials if rejected by the server
  */
 static void
 call_refresh(struct rpc_task *task)
@@ -705,7 +761,7 @@
 }
 
 /*
- * 8a.	Process the results of a credential refresh
+ * 7a.	Process the results of a credential refresh
  */
 static void
 call_refreshresult(struct rpc_task *task)
@@ -713,11 +769,10 @@
 	dprintk("RPC: %4d call_refreshresult (status %d)\n", 
 				task->tk_pid, task->tk_status);
 
-	if (task->tk_status < 0) {
-		task->tk_status = -EACCES;
-		task->tk_action = NULL;
-	} else
-		task->tk_action = call_reserve;
+	if (task->tk_status < 0)
+		rpc_exit(task, -EACCES);
+	else
+		task->tk_action = call_bind;
 }
 
 /*
--- linux/net/sunrpc/pmap_clnt.c.nfsattack-gafton	Sun Oct 12 13:17:46 1997
+++ linux/net/sunrpc/pmap_clnt.c	Fri Feb  4 23:26:34 2000
@@ -41,6 +41,7 @@
 {
 	struct rpc_portmap *map = &clnt->cl_pmap;
 	struct sockaddr_in *sap = &clnt->cl_xprt->addr;
+	struct rpc_message msg = { PMAP_GETPORT, map, &clnt->cl_port, NULL };
 	struct rpc_clnt	*pmap_clnt;
 	struct rpc_task	*child;
 
@@ -66,7 +67,7 @@
 		goto bailout;
 
 	/* Setup the call info struct */
-	rpc_call_setup(child, PMAP_GETPORT, map, &clnt->cl_port, 0);
+	rpc_call_setup(child, &msg, 0);
 
 	/* ... and run the child task */
 	rpc_run_child(task, child, pmap_getport_done);
@@ -121,7 +122,7 @@
 		task->tk_action = NULL;
 	} else if (clnt->cl_port == 0) {
 		/* Program not registered */
-		task->tk_status = -EACCES;
+		task->tk_status = -EPROTONOSUPPORT;
 		task->tk_action = NULL;
 	} else {
 		/* byte-swap port number first */
--- linux/net/sunrpc/sched.c.nfsattack-gafton	Fri May  7 14:19:11 1999
+++ linux/net/sunrpc/sched.c	Fri Feb  4 23:26:34 2000
@@ -18,6 +18,7 @@
 #include <linux/unistd.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
+#include <asm/spinlock.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -74,6 +75,66 @@
 static int			swap_buffer_used = 0;
 
 /*
+ * Make allocation of the swap_buffer SMP-safe
+ */
+static __inline__ int rpc_lock_swapbuf(void)
+{
+        return !test_and_set_bit(1, &swap_buffer_used);
+}
+static __inline__ void rpc_unlock_swapbuf(void)
+{
+        clear_bit(1, &swap_buffer_used);
+}
+
+/*
+ * Set up a timer for the current task.
+ */
+inline void
+rpc_add_timer(struct rpc_task *task, rpc_action timer)
+{
+	unsigned long	expires = jiffies + task->tk_timeout;
+
+	dprintk("RPC: %4d setting alarm for %lu ms\n",
+			task->tk_pid, task->tk_timeout * 1000 / HZ);
+	if (del_timer(&task->tk_timer))
+		printk(KERN_ERR "RPC: Bug! Overwriting active timer\n");
+	if (!timer)
+		timer = __rpc_default_timer;
+	if (time_before(expires, jiffies)) {
+		printk(KERN_ERR "RPC: bad timeout value %ld - setting to 10 sec!\n",
+					task->tk_timeout);
+		expires = jiffies + 10 * HZ;
+	}
+	task->tk_timer.expires  = expires;
+	task->tk_timer.data     = (unsigned long) task;
+	task->tk_timer.function = (void (*)(unsigned long)) timer;
+	task->tk_timer.prev     = NULL;
+	task->tk_timer.next     = NULL;
+	add_timer(&task->tk_timer);
+}
+
+/*
+ * Delete any timer for the current task.
+ */
+inline void
+rpc_del_timer(struct rpc_task *task)
+{
+	dprintk("RPC: %4d deleting timer\n", task->tk_pid);
+	del_timer(&task->tk_timer);
+	task->tk_timeout = 0;
+}
+
+/*
+ * Spinlock for wait queues. Access to the latter also has to be
+ * interrupt-safe in order to allow timers to wake up sleeping tasks.
+ */
+spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
+/*
+ * Spinlock for other critical sections of code.
+ */
+spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
+
+/*
  * Add new request to wait queue.
  *
  * Swapper tasks always get inserted at the head of the queue.
@@ -82,15 +143,16 @@
  * Everyone else gets appended to the queue to ensure proper FIFO behavior.
  */
 int
-rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+__rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	if (task->tk_rpcwait) {
-		if (task->tk_rpcwait != queue)
-		{
-			printk(KERN_WARNING "RPC: doubly enqueued task!\n");
-			return -EWOULDBLOCK;
-		}
+	if (task->tk_rpcwait == queue)
 		return 0;
+
+	if (task->tk_rpcwait) {
+		printk(KERN_WARNING "RPC: task already queued!\n");
+		dprintk("task already on %s, to be added to %s\n",
+			rpc_qname(task->tk_rpcwait), rpc_qname(queue));
+		return -EWOULDBLOCK;
 	}
 	if (RPC_IS_SWAPPER(task))
 		rpc_insert_list(&queue->task, task);
@@ -104,17 +166,30 @@
 	return 0;
 }
 
+int
+rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
+{
+	unsigned long oldflags;
+	int result;
+
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
+	result = __rpc_add_wait_queue(q, task);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+	return result;
+}
+
 /*
  * Remove request from queue.
- * Note: must be called with interrupts disabled.
+ * Note: must be called with spin lock held.
  */
 void
-rpc_remove_wait_queue(struct rpc_task *task)
+__rpc_remove_wait_queue(struct rpc_task *task)
 {
-	struct rpc_wait_queue *queue;
+	struct rpc_wait_queue *queue = task->tk_rpcwait;
 
-	if (!(queue = task->tk_rpcwait))
+	if (!task->tk_rpcwait)
 		return;
+
 	rpc_remove_list(&queue->task, task);
 	task->tk_rpcwait = NULL;
 
@@ -122,50 +197,22 @@
 				task->tk_pid, queue, rpc_qname(queue));
 }
 
-/*
- * Set up a timer for the current task.
- */
-inline void
-rpc_add_timer(struct rpc_task *task, rpc_action timer)
+void
+rpc_remove_wait_queue(struct rpc_task *task)
 {
-	unsigned long	expires = jiffies + task->tk_timeout;
-
-	dprintk("RPC: %4d setting alarm for %lu ms\n",
-			task->tk_pid, task->tk_timeout * 1000 / HZ);
-	if (!timer)
-		timer = __rpc_default_timer;
-	if (time_before(expires, jiffies)) {
-		printk(KERN_ERR "RPC: bad timeout value %ld - setting to 10 sec!\n",
-					task->tk_timeout);
-		expires = jiffies + 10 * HZ;
-	}
-	task->tk_timer.expires  = expires;
-	task->tk_timer.data     = (unsigned long) task;
-	task->tk_timer.function = (void (*)(unsigned long)) timer;
-	task->tk_timer.prev     = NULL;
-	task->tk_timer.next     = NULL;
-	add_timer(&task->tk_timer);
-}
+	unsigned long oldflags;
 
-/*
- * Delete any timer for the current task.
- * Must be called with interrupts off.
- */
-inline void
-rpc_del_timer(struct rpc_task *task)
-{
-	if (task->tk_timeout) {
-		dprintk("RPC: %4d deleting timer\n", task->tk_pid);
-		del_timer(&task->tk_timer);
-		task->tk_timeout = 0;
-	}
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
+	rpc_del_timer(task);
+	__rpc_remove_wait_queue(task);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 }
 
 /*
  * Make an RPC task runnable.
  *
  * Note: If the task is ASYNC, this must be called with 
- * interrupts disabled to protect the wait queue operation.
+ * the spinlock held to protect the wait queue operation.
  */
 static inline void
 rpc_make_runnable(struct rpc_task *task)
@@ -176,17 +223,33 @@
 	}
 	task->tk_flags |= RPC_TASK_RUNNING;
 	if (RPC_IS_ASYNC(task)) {
-		int status;
-		status = rpc_add_wait_queue(&schedq, task);
-		if (status)
-		{
-			printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
-			task->tk_status = status;
+		if (RPC_IS_SLEEPING(task)) {
+			int status;
+			status = __rpc_add_wait_queue(&schedq, task);
+			if (status) {
+				printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
+				task->tk_status = status;
+			}
 		}
 		wake_up(&rpciod_idle);
 	} else {
 		wake_up(&task->tk_wait);
 	}
+	task->tk_sleeping = 0;
+}
+
+/*
+ * Place a newly initialized task on the schedq.
+ */
+static inline void
+rpc_schedule_run(struct rpc_task *task)
+{
+	/* Don't run a child twice! */
+	if (RPC_IS_ACTIVATED(task))
+		return;
+	task->tk_active = 1;
+	task->tk_sleeping = 1;
+	rpc_make_runnable(task);
 }
 
 
@@ -214,33 +277,23 @@
 __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 			rpc_action action, rpc_action timer)
 {
-	unsigned long	oldflags;
 	int status;
 
 	dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
 				rpc_qname(q), jiffies);
 
-	/*
-	 * Protect the execution below.
-	 */
-	save_flags(oldflags); cli();
-
-	status = rpc_add_wait_queue(q, task);
-	if (status)
-	{
+	status = __rpc_add_wait_queue(q, task);
+	if (status) {
 		printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
 		task->tk_status = status;
-		task->tk_flags |= RPC_TASK_RUNNING;
 	}
-	else
-	{
+	else {
 		task->tk_callback = action;
 		if (task->tk_timeout)
 			rpc_add_timer(task, timer);
 		task->tk_flags &= ~RPC_TASK_RUNNING;
 	}
 
-	restore_flags(oldflags);
 	return;
 }
 
@@ -248,7 +301,51 @@
 rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 				rpc_action action, rpc_action timer)
 {
+	unsigned long	oldflags;
+
+	if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
+		printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
+		return;
+	}
+
+	/*
+	 * Protect the queue operations.
+	 */
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
+	/* Mark the task as being activated if so needed */
+	if (!RPC_IS_ACTIVATED(task)) {
+		task->tk_active = 1;
+		task->tk_sleeping = 1;
+	}
+
 	__rpc_sleep_on(q, task, action, timer);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+}
+
+void
+rpc_sleep_locked(struct rpc_wait_queue *q, struct rpc_task *task,
+		 rpc_action action, rpc_action timer)
+{
+	unsigned long	oldflags;
+
+	if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
+		printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
+		return;
+	}
+
+	/*
+	 * Protect the queue operations.
+	 */
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
+	/* Mark the task as being activated if so needed */
+	if (!RPC_IS_ACTIVATED(task)) {
+		task->tk_active = 1;
+		task->tk_sleeping = 1;
+	}
+
+	__rpc_sleep_on(q, task, action, timer);
+	task->tk_lock++;
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 }
 
 /*
@@ -267,17 +364,31 @@
 	if (task->tk_magic != 0xf00baa) {
 		printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n");
 		rpc_debug = ~0;
+		rpc_show_tasks();
 		return;
 	}
 #endif
+	/* Has the task been executed yet? If not, we cannot wake it up! */
+	if (!RPC_IS_ACTIVATED(task))
+		return;
+
 	rpc_del_timer(task);
+
+	/* If the task has been locked, then set tk_wakeup so that
+	 * rpc_unlock_task() wakes us up... */
+	if (task->tk_lock) {
+		task->tk_wakeup = 1;
+		return;
+	} else
+		task->tk_wakeup = 0;
+
 	if (task->tk_rpcwait != &schedq)
-		rpc_remove_wait_queue(task);
+		__rpc_remove_wait_queue(task);
 	if (!RPC_IS_RUNNING(task)) {
 		task->tk_flags |= RPC_TASK_CALLBACK;
 		rpc_make_runnable(task);
 	}
-	dprintk("RPC:      __rpc_wake_up done\n");
+	/* dprintk("RPC:      __rpc_wake_up done\n"); */
 }
 
 /*
@@ -286,10 +397,10 @@
 static void
 __rpc_default_timer(struct rpc_task *task)
 {
-	dprintk("RPC: %d timeout (default timer)\n", task->tk_pid);
+	dprintk("RPC: %4d timeout (default timer)\n", task->tk_pid);
 	task->tk_status = -ETIMEDOUT;
 	task->tk_timeout = 0;
-	__rpc_wake_up(task);
+	rpc_wake_up_task(task);
 }
 
 /*
@@ -300,9 +411,11 @@
 {
 	unsigned long	oldflags;
 
-	save_flags(oldflags); cli();
+	if (RPC_IS_RUNNING(task))
+		return;
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	__rpc_wake_up(task);
-	restore_flags(oldflags);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 }
 
 /*
@@ -315,10 +428,10 @@
 	struct rpc_task	*task;
 
 	dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
-	save_flags(oldflags); cli();
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	if ((task = queue->task) != 0)
 		__rpc_wake_up(task);
-	restore_flags(oldflags);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 
 	return task;
 }
@@ -331,10 +444,10 @@
 {
 	unsigned long	oldflags;
 
-	save_flags(oldflags); cli();
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	while (queue->task)
 		__rpc_wake_up(queue->task);
-	restore_flags(oldflags);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 }
 
 /*
@@ -346,12 +459,42 @@
 	struct rpc_task	*task;
 	unsigned long	oldflags;
 
-	save_flags(oldflags); cli();
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	while ((task = queue->task) != NULL) {
 		task->tk_status = status;
 		__rpc_wake_up(task);
 	}
-	restore_flags(oldflags);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+}
+
+/*
+ * Lock down a sleeping task to prevent it from waking up
+ * and disappearing from beneath us.
+ *
+ * This function should always be called with the
+ * rpc_queue_lock held.
+ */
+int
+rpc_lock_task(struct rpc_task *task)
+{
+	if (!RPC_IS_RUNNING(task)) {
+		task->tk_lock++;
+		return 1;
+	}
+	return 0;
+}
+
+void
+rpc_unlock_task(struct rpc_task *task)
+{
+	unsigned long	oldflags;
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
+	if (task->tk_lock) {
+		task->tk_lock--;
+		if (!task->tk_lock && task->tk_wakeup)
+			__rpc_wake_up(task);
+	}
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 }
 
 /*
@@ -369,7 +512,7 @@
 __rpc_atrun(struct rpc_task *task)
 {
 	task->tk_status = 0;
-	__rpc_wake_up(task);
+	rpc_wake_up_task(task);
 }
 
 /*
@@ -379,7 +522,8 @@
 __rpc_execute(struct rpc_task *task)
 {
 	unsigned long	oldflags;
-	int		status = 0;
+	int		status = 0,
+			loop_count = 0;
 
 	dprintk("RPC: %4d rpc_execute flgs %x\n",
 				task->tk_pid, task->tk_flags);
@@ -389,11 +533,12 @@
 		return 0;
 	}
 
+restarted:
 	while (1) {
 		/*
 		 * Execute any pending callback.
 		 */
-		if (task->tk_flags & RPC_TASK_CALLBACK) {
+		if (RPC_DO_CALLBACK(task)) {
 			/* Define a callback save pointer */
 			void (*save_callback)(struct rpc_task *);
 	
@@ -413,56 +558,64 @@
 		}
 
 		/*
-		 * No handler for next step means exit.
-		 */
-		if (!task->tk_action)
-			break;
-
-		/*
 		 * Perform the next FSM step.
 		 * tk_action may be NULL when the task has been killed
 		 * by someone else.
 		 */
-		if (RPC_IS_RUNNING(task) && task->tk_action)
+		if (RPC_IS_RUNNING(task)) {
+			if (!task->tk_action)
+				break;
 			task->tk_action(task);
+		}
 
 		/*
 		 * Check whether task is sleeping.
-		 * Note that if the task may go to sleep in tk_action,
+		 * Note that if the task goes to sleep in tk_action,
 		 * and the RPC reply arrives before we get here, it will
 		 * have state RUNNING, but will still be on schedq.
+		 * 27/9/99: The above has been attempted fixed by
+		 *          introduction of task->tk_sleeping.
 		 */
-		save_flags(oldflags); cli();
+		spin_lock_irqsave(&rpc_queue_lock, oldflags);
 		if (RPC_IS_RUNNING(task)) {
-			if (task->tk_rpcwait == &schedq)
-				rpc_remove_wait_queue(task);
-		} else while (!RPC_IS_RUNNING(task)) {
+			if (task->tk_rpcwait == &schedq) {
+				printk(KERN_ERR "RPC: running task was placed on schedq!\n");
+				__rpc_remove_wait_queue(task);
+			}
+		} else {
+			task->tk_sleeping = 1;
 			if (RPC_IS_ASYNC(task)) {
-				restore_flags(oldflags);
+				spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 				return 0;
 			}
+		}
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 
+		while (RPC_IS_SLEEPING(task)) {
 			/* sync task: sleep here */
 			dprintk("RPC: %4d sync task going to sleep\n",
 							task->tk_pid);
 			if (current->pid == rpciod_pid)
 				printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
 
-			sti();
-			__wait_event(task->tk_wait, RPC_IS_RUNNING(task));
-			cli();
+			wait_event(task->tk_wait, RPC_IS_RUNNING(task));
+
+			/* If signalled() but task is locked we need to wait. */
+			if (loop_count++ > 200) {
+				loop_count = 0;
+				schedule();
+			}
 
 			/*
 			 * When the task received a signal, remove from
 			 * any queues etc, and make runnable again.
 			 */
-			if (signalled())
-				__rpc_wake_up(task);
+			if (task->tk_client->cl_intr && signalled())
+				rpc_wake_up_task(task);
 
 			dprintk("RPC: %4d sync task resuming\n",
 							task->tk_pid);
 		}
-		restore_flags(oldflags);
 
 		/*
 		 * When a sync task receives a signal, it exits with
@@ -470,44 +623,73 @@
 		 * clean up after sleeping on some queue, we don't
 		 * break the loop here, but go around once more.
 		 */
-		if (!RPC_IS_ASYNC(task) && signalled()) {
+		if (!RPC_IS_ASYNC(task) && task->tk_client->cl_intr && signalled()) {
 			dprintk("RPC: %4d got signal\n", task->tk_pid);
 			rpc_exit(task, -ERESTARTSYS);
 		}
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
 	}
 
 	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
 	if (task->tk_exit) {
-		status = task->tk_status;
 		task->tk_exit(task);
+		/* If tk_action is non-null, the user wants us to restart */
+		if (task->tk_action) {
+			if ((RPC_IS_ASYNC(task)
+			     || !(task->tk_client->cl_intr && signalled()))
+			     && !RPC_ASSASSINATED(task)) {
+					/* Release RPC slot and buffer memory */
+					if (task->tk_rqstp)
+						xprt_release(task);
+					if (task->tk_buffer) {
+						rpc_free(task->tk_buffer);
+						task->tk_buffer = NULL;
+					}
+					goto restarted;
+			}
+			printk("RPC: dead task tries to walk away.\n");
+		}
 	}
 
+	/* Save the task exit status */
+	status = task->tk_status;
+
+	/* Release all resources associated with the task */
+	rpc_release_task(task);
 	return status;
 }
 
 /*
  * User-visible entry point to the scheduler.
- * The recursion protection is for debugging. It should go away once
- * the code has stabilized.
+ *
+ * This may be called recursively if e.g. an async NFS task updates
+ * the attributes and finds that dirty pages must be flushed.
  */
-void
+int
 rpc_execute(struct rpc_task *task)
 {
 	static int	executing = 0;
 	int		incr = RPC_IS_ASYNC(task)? 1 : 0;
+	int		status;
 
 	if (incr) {
 		if (rpc_inhibit) {
 			printk(KERN_INFO "RPC: execution inhibited!\n");
-			return;
+			return -EIO;
 		}
 		if (executing)
 			printk(KERN_WARNING "RPC: %d tasks executed\n", executing);
 	}
+	task->tk_flags |= RPC_TASK_RUNNING;
+	task->tk_active = 1;
 	
 	executing += incr;
-	__rpc_execute(task);
+	status = __rpc_execute(task);
 	executing -= incr;
+	return status;
 }
 
 /*
@@ -519,30 +701,35 @@
 	struct rpc_task	*task;
 	int		count = 0;
 	unsigned long	oldflags;
-	int need_resched = current->need_resched;
 
-	dprintk("RPC:      rpc_schedule enter\n");
-	save_flags(oldflags);
+	/* dprintk("RPC:      rpc_schedule enter\n"); */
 	while (1) {
-		cli();
-		if (!(task = schedq.task))
+		spin_lock_irqsave(&rpc_queue_lock, oldflags);
+		if (!(task = schedq.task)) {
+			spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 			break;
+		}
+		if (task->tk_lock) {
+			spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+			printk(KERN_ERR "RPC: Locked task was scheduled !!!!\n");
+			rpc_debug = ~0;
+			rpc_show_tasks();
+			break;
+		}
 		rpc_del_timer(task);
-		rpc_remove_wait_queue(task);
 		task->tk_flags |= RPC_TASK_RUNNING;
-		restore_flags(oldflags);
+		__rpc_remove_wait_queue(task);
+		task->tk_sleeping = 0;
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 
 		__rpc_execute(task);
 
 		if (++count >= 200) {
 			count = 0;
-			need_resched = 1;
-		}
-		if (need_resched)
 			schedule();
+		}
 	}
-	restore_flags(oldflags);
-	dprintk("RPC:      rpc_schedule leave\n");
+	/* dprintk("RPC:      rpc_schedule leave\n"); */
 }
 
 /*
@@ -577,12 +764,13 @@
 	else
 		gfp = GFP_KERNEL;
 
-	do {
+	while (1) {
 		if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
 			dprintk("RPC:      allocated buffer %p\n", buffer);
 			return buffer;
 		}
-		if ((flags & RPC_TASK_SWAPPER) && !swap_buffer_used++) {
+		if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
+		 && rpc_lock_swapbuf()) {
 			dprintk("RPC:      used last-ditch swap buffer\n");
 			return swap_buffer;
 		}
@@ -590,7 +778,8 @@
 			return NULL;
 		current->state = TASK_INTERRUPTIBLE;
 		schedule_timeout(HZ>>4);
-	} while (!signalled());
+		current->state = TASK_RUNNING;
+	}
 
 	return NULL;
 }
@@ -602,20 +791,21 @@
 		kfree(buffer);
 		return;
 	}
-	swap_buffer_used = 0;
+	rpc_unlock_swapbuf();
 }
 
 /*
  * Creation and deletion of RPC task structures
  */
-inline void
+void
 rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
 				rpc_action callback, int flags)
 {
 	memset(task, 0, sizeof(*task));
+	init_timer(&task->tk_timer);
 	task->tk_client = clnt;
-	task->tk_flags  = RPC_TASK_RUNNING | flags;
 	task->tk_exit   = callback;
+	task->tk_flags  = flags;
 	if (current->uid != current->fsuid || current->gid != current->fsgid)
 		task->tk_flags |= RPC_TASK_SETUID;
 
@@ -624,22 +814,26 @@
 	task->tk_cred_retry = 2;
 	task->tk_suid_retry = 1;
 
+#ifdef RPC_DEBUG
+	task->tk_magic = 0xf00baa;
+	task->tk_pid = rpc_task_id++;
+#endif
+
 	/* Add to global list of all tasks */
+	spin_lock(&rpc_sched_lock);
 	task->tk_next_task = all_tasks;
 	task->tk_prev_task = NULL;
 	if (all_tasks)
 		all_tasks->tk_prev_task = task;
 	all_tasks = task;
+	spin_unlock(&rpc_sched_lock);
 
 	if (clnt)
 		clnt->cl_users++;
 
-#ifdef RPC_DEBUG
-	task->tk_magic = 0xf00baa;
-	task->tk_pid = rpc_task_id++;
-#endif
-	dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
-				current->pid);
+	dprintk("RPC: %d new task procpid %d%s\n",
+			task->tk_pid, current->pid,
+			(flags & RPC_TASK_DYNAMIC) ? " (alloc)" : "");
 }
 
 /*
@@ -656,10 +850,7 @@
 	if (!task)
 		goto cleanup;
 
-	rpc_init_task(task, clnt, callback, flags);
-
-	dprintk("RPC: %4d allocated task\n", task->tk_pid);
-	task->tk_flags |= RPC_TASK_DYNAMIC;
+	rpc_init_task(task, clnt, callback, flags | RPC_TASK_DYNAMIC);
 out:
 	return task;
 
@@ -678,10 +869,12 @@
 rpc_release_task(struct rpc_task *task)
 {
 	struct rpc_task	*next, *prev;
+	unsigned long	oldflags;
 
 	dprintk("RPC: %4d release task\n", task->tk_pid);
 
 	/* Remove from global task list */
+	spin_lock(&rpc_sched_lock);
 	prev = task->tk_prev_task;
 	next = task->tk_next_task;
 	if (next)
@@ -690,12 +883,27 @@
 		prev->tk_next_task = next;
 	else
 		all_tasks = next;
+	task->tk_next_task = task->tk_prev_task = NULL;
+	spin_unlock(&rpc_sched_lock);
+
+	/* Protect the execution below. */
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
+
+	/* Delete any running timer */
+	rpc_del_timer(task);
+
+	/* Remove from any wait queue we're still on */
+	__rpc_remove_wait_queue(task);
+
+	task->tk_active = 0;
+
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 
 	/* Release resources */
 	if (task->tk_rqstp)
 		xprt_release(task);
 	if (task->tk_cred)
-		rpcauth_releasecred(task);
+		rpcauth_unbindcred(task);
 	if (task->tk_buffer) {
 		rpc_free(task->tk_buffer);
 		task->tk_buffer = NULL;
@@ -711,9 +919,12 @@
 
 	if (task->tk_flags & RPC_TASK_DYNAMIC) {
 		dprintk("RPC: %4d freeing task\n", task->tk_pid);
+		if (task->tk_release)
+			task->tk_release(task);
 		task->tk_flags &= ~RPC_TASK_DYNAMIC;
 		rpc_free(task);
-	}
+	} else if (task->tk_release)
+			task->tk_release(task);
 }
 
 /*
@@ -737,13 +948,15 @@
 static void
 rpc_child_exit(struct rpc_task *child)
 {
+	unsigned long	oldflags;
 	struct rpc_task	*parent;
 
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	if ((parent = rpc_find_parent(child)) != NULL) {
 		parent->tk_status = child->tk_status;
-		rpc_wake_up_task(parent);
+		__rpc_wake_up(parent);
 	}
-	rpc_release_task(child);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 }
 
 /*
@@ -771,11 +984,11 @@
 {
 	unsigned long oldflags;
 
-	save_flags(oldflags); cli();
-	rpc_make_runnable(child);
-	restore_flags(oldflags);
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	/* N.B. Is it possible for the child to have already finished? */
-	rpc_sleep_on(&childq, task, func, NULL);
+	__rpc_sleep_on(&childq, task, func, NULL);
+	rpc_schedule_run(child);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 }
 
 /*
@@ -788,8 +1001,10 @@
 	struct rpc_task	**q, *rovr;
 
 	dprintk("RPC:      killing all tasks for client %p\n", clnt);
-	/* N.B. Why bother to inhibit? Nothing blocks here ... */
-	rpc_inhibit++;
+	/*
+	 * Spin lock all_tasks to prevent changes...
+	 */
+	spin_lock(&rpc_sched_lock);
 	for (q = &all_tasks; (rovr = *q); q = &rovr->tk_next_task) {
 		if (!clnt || rovr->tk_client == clnt) {
 			rovr->tk_flags |= RPC_TASK_KILLED;
@@ -797,11 +1012,17 @@
 			rpc_wake_up_task(rovr);
 		}
 	}
-	rpc_inhibit--;
+	spin_unlock(&rpc_sched_lock);
 }
 
 static struct semaphore rpciod_running = MUTEX_LOCKED;
 
+static inline int
+rpciod_task_pending(void)
+{
+	return schedq.task != NULL || xprt_tcp_pending();
+}
+
 /*
  * This is the rpciod kernel thread
  */
@@ -809,11 +1030,12 @@
 rpciod(void *ptr)
 {
 	struct wait_queue **assassin = (struct wait_queue **) ptr;
-	unsigned long	oldflags;
 	int		rounds = 0;
 
 	MOD_INC_USE_COUNT;
+
 	lock_kernel();
+
 	/*
 	 * Let our maker know we're running ...
 	 */
@@ -840,22 +1062,19 @@
 		}
 		__rpc_schedule();
 
-		if (++rounds >= 64) {	/* safeguard */
+		if (++rounds >= 64 || current->need_resched) {	/* safeguard */
 			schedule();
 			rounds = 0;
 		}
-		save_flags(oldflags); cli();
 		dprintk("RPC: rpciod running checking dispatch\n");
 		rpciod_tcp_dispatcher();
 
-		if (!schedq.task) {
+		if (!rpciod_task_pending()) {
 			dprintk("RPC: rpciod back to sleep\n");
-			interruptible_sleep_on(&rpciod_idle);
+			wait_event_interruptible(rpciod_idle, rpciod_task_pending());
 			dprintk("RPC: switch to rpciod\n");
-			rpciod_tcp_dispatcher();
 			rounds = 0;
 		}
-		restore_flags(oldflags);
 	}
 
 	dprintk("RPC: rpciod shutdown commences\n");
@@ -885,6 +1104,7 @@
 			dprintk("rpciod_killall: waiting for tasks to exit\n");
 			current->state = TASK_INTERRUPTIBLE;
 			schedule_timeout(1);
+			current->state = TASK_RUNNING;
 		}
 	}
 
@@ -956,6 +1176,7 @@
 	current->sigpending = 0;
 	current->state = TASK_INTERRUPTIBLE;
 	schedule_timeout(1);
+	current->state = TASK_RUNNING;
 	/*
 	 * Display a message if we're going to wait longer.
 	 */
@@ -975,15 +1196,16 @@
 	MOD_DEC_USE_COUNT;
 }
 
-#ifdef RPC_DEBUG
-#include <linux/nfs_fs.h>
 void rpc_show_tasks(void)
 {
 	struct rpc_task *t = all_tasks, *next;
-	struct nfs_wreq *wreq;
 
-	if (!t)
+	spin_lock(&rpc_sched_lock);
+	t = all_tasks;
+	if (!t) {
+		spin_unlock(&rpc_sched_lock);
 		return;
+	}
 	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
 		"-rpcwait -action- --exit--\n");
 	for (; t; t = next) {
@@ -994,17 +1216,6 @@
 			t->tk_rqstp, t->tk_timeout,
 			t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " <NULL> ",
 			t->tk_action, t->tk_exit);
-
-		if (!(t->tk_flags & RPC_TASK_NFSWRITE))
-			continue;
-		/* NFS write requests */
-		wreq = (struct nfs_wreq *) t->tk_calldata;
-		printk("     NFS: flgs=%08x, pid=%d, pg=%p, off=(%d, %d)\n",
-			wreq->wb_flags, wreq->wb_pid, wreq->wb_page,
-			wreq->wb_offset, wreq->wb_bytes);
-		printk("          name=%s/%s\n",
-			wreq->wb_file->f_dentry->d_parent->d_name.name,
-			wreq->wb_file->f_dentry->d_name.name);
 	}
+	spin_unlock(&rpc_sched_lock);
 }
-#endif
--- linux/net/sunrpc/stats.c.nfsattack-gafton	Sat Jan  2 20:55:06 1999
+++ linux/net/sunrpc/stats.c	Fri Feb  4 23:26:34 2000
@@ -128,6 +128,7 @@
 {
 	struct proc_dir_entry	*ent;
 
+	rpc_proc_init();
 	dprintk("RPC: registering /proc/net/rpc/%s\n", name);
 	ent = create_proc_entry(name, 0, proc_net_rpc);
 	ent->read_proc = issvc? svc_proc_read : rpc_proc_read;
--- linux/net/sunrpc/sunrpc_syms.c.nfsattack-gafton	Wed Aug 19 19:16:04 1998
+++ linux/net/sunrpc/sunrpc_syms.c	Fri Feb  4 23:26:34 2000
@@ -26,13 +26,16 @@
 EXPORT_SYMBOL(rpc_allocate);
 EXPORT_SYMBOL(rpc_free);
 EXPORT_SYMBOL(rpc_execute);
+EXPORT_SYMBOL(rpc_new_task);
 EXPORT_SYMBOL(rpc_init_task);
 EXPORT_SYMBOL(rpc_release_task);
 EXPORT_SYMBOL(rpc_sleep_on);
+EXPORT_SYMBOL(rpc_wake_up);
 EXPORT_SYMBOL(rpc_wake_up_next);
 EXPORT_SYMBOL(rpc_wake_up_task);
 EXPORT_SYMBOL(rpc_new_child);
 EXPORT_SYMBOL(rpc_run_child);
+EXPORT_SYMBOL(rpciod_wake_up);
 EXPORT_SYMBOL(rpciod_down);
 EXPORT_SYMBOL(rpciod_up);
 
@@ -41,7 +44,8 @@
 EXPORT_SYMBOL(rpc_destroy_client);
 EXPORT_SYMBOL(rpc_shutdown_client);
 EXPORT_SYMBOL(rpc_killall_tasks);
-EXPORT_SYMBOL(rpc_do_call);
+EXPORT_SYMBOL(rpc_call_sync);
+EXPORT_SYMBOL(rpc_call_async);
 EXPORT_SYMBOL(rpc_call_setup);
 EXPORT_SYMBOL(rpc_clnt_sigmask);
 EXPORT_SYMBOL(rpc_clnt_sigunmask);
@@ -60,7 +64,9 @@
 EXPORT_SYMBOL(rpcauth_free_credcache);
 EXPORT_SYMBOL(rpcauth_insert_credcache);
 EXPORT_SYMBOL(rpcauth_lookupcred);
+EXPORT_SYMBOL(rpcauth_bindcred);
 EXPORT_SYMBOL(rpcauth_matchcred);
+EXPORT_SYMBOL(rpcauth_holdcred);
 EXPORT_SYMBOL(rpcauth_releasecred);
 
 /* RPC server stuff */
@@ -93,6 +99,9 @@
 EXPORT_SYMBOL(xdr_encode_netobj);
 EXPORT_SYMBOL(xdr_zero);
 EXPORT_SYMBOL(xdr_one);
+EXPORT_SYMBOL(xdr_two);
+EXPORT_SYMBOL(xdr_shift_iovec);
+EXPORT_SYMBOL(xdr_zero_iovec);
 
 /* RPC errors */
 EXPORT_SYMBOL(rpc_success);
--- linux/net/sunrpc/svc.c.nfsattack-gafton	Tue Jan  4 13:12:27 2000
+++ linux/net/sunrpc/svc.c	Fri Feb  4 23:26:34 2000
@@ -30,6 +30,9 @@
 svc_create(struct svc_program *prog, unsigned int bufsize, unsigned int xdrsize)
 {
 	struct svc_serv	*serv;
+#ifdef RPC_DEBUG
+	rpc_register_sysctl();
+#endif
 
 	xdr_init();
 
--- linux/net/sunrpc/sysctl.c.nfsattack-gafton	Thu Jan  7 12:28:47 1999
+++ linux/net/sunrpc/sysctl.c	Fri Feb  4 23:26:34 2000
@@ -99,9 +99,8 @@
 			left--, p++;
 		*(unsigned int *) table->data = value;
 		/* Display the RPC tasks on writing to rpc_debug */
-		if (table->ctl_name == CTL_RPCDEBUG) {
+		if (table->ctl_name == CTL_RPCDEBUG)
 			rpc_show_tasks();
-		}
 	} else {
 		if (!access_ok(VERIFY_WRITE, buffer, left))
 			return -EFAULT;
--- linux/net/sunrpc/xdr.c.nfsattack-gafton	Mon Apr  7 14:35:33 1997
+++ linux/net/sunrpc/xdr.c	Fri Feb  4 23:26:34 2000
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/string.h>
+#include <linux/kernel.h>
 #include <linux/in.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
@@ -84,11 +85,13 @@
 }
 
 u32 *
-xdr_encode_string(u32 *p, const char *string)
+xdr_encode_string(u32 *p, const char *string, int len)
 {
-	int len = strlen(string);
-	int quadlen = XDR_QUADLEN(len);
+	int quadlen;
 
+	if (len < 0)
+		len = strlen(string);
+	quadlen = XDR_QUADLEN(len);
 	p[quadlen] = 0;
 	*p++ = htonl(len);
 	memcpy(p, string, len);
@@ -116,3 +119,56 @@
 	return p + XDR_QUADLEN(len);
 }
 
+/*
+ * Realign the iovec if the server missed out some reply elements
+ * (such as post-op attributes,...)
+ * Note: This is a simple implementation that assumes that
+ *            len <= iov->iov_len !!!
+ *       The RPC header (assumed to be the 1st element in the iov array)
+ *            is not shifted.
+ */
+void xdr_shift_iovec(struct iovec *iov, int nr, size_t len)
+{
+	struct iovec *pvec;
+
+	if (nr <= 0)
+		return;
+	for (pvec = iov + nr - 1; pvec != iov; pvec--) {
+		struct iovec *svec = pvec - 1;
+
+		if (len > pvec->iov_len) {
+			printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
+			return;
+		}
+		memmove((u8 *)pvec->iov_base + len, pvec->iov_base,
+			pvec->iov_len - len);
+
+		if (len > svec->iov_len) {
+			printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
+			return;
+		}
+		memcpy(pvec->iov_base,
+		       (u8 *)svec->iov_base + svec->iov_len - len, len);
+	}
+}
+
+/*
+ * Zero the last n bytes in an iovec array of 'nr' elements
+ */
+void xdr_zero_iovec(struct iovec *iov, int nr, size_t n)
+{
+	struct iovec *pvec = iov + nr - 1;
+
+	while (n) {
+		if (n < pvec->iov_len) {
+			memset((char *)pvec->iov_base + pvec->iov_len - n, 0, n);
+			n = 0;
+		} else {
+			memset(pvec->iov_base, 0, n);
+			n -= pvec->iov_len;
+		}
+		if (pvec == iov)
+			break;
+		pvec--;
+	}
+}
--- linux/net/sunrpc/xprt.c.nfsattack-gafton	Tue Jan  4 13:12:27 2000
+++ linux/net/sunrpc/xprt.c	Fri Feb  4 23:26:34 2000
@@ -31,7 +31,7 @@
  *  primitives that `transparently' work for processes as well as async
  *  tasks that rely on callbacks.
  *
- *  Copyright (C) 1995, 1996, Olaf Kirch <okir@monad.swb.de>
+ *  Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de>
  *
  *  TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
  *  TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
@@ -59,14 +59,18 @@
 
 #include <asm/uaccess.h>
 
-#define SOCK_HAS_USER_DATA
+/* Following value should be > 32k + RPC overhead */
+#define XPRT_MIN_WRITE_SPACE 35000
+
+extern spinlock_t rpc_queue_lock;
 
 /*
  * Local variables
  */
-#ifndef SOCK_HAS_USER_DATA
-static struct rpc_xprt *	sock_list = NULL;
-#endif
+
+/* Spinlock for critical sections in the code. */
+spinlock_t xprt_sock_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t xprt_lock = SPIN_LOCK_UNLOCKED;
 
 #ifdef RPC_DEBUG
 # undef  RPC_DEBUG_DATA
@@ -82,10 +86,11 @@
  * Local functions
  */
 static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
+static void	do_xprt_transmit(struct rpc_task *);
 static void	xprt_transmit_status(struct rpc_task *task);
 static void	xprt_receive_status(struct rpc_task *task);
 static void	xprt_reserve_status(struct rpc_task *task);
-static void	xprt_reconn_timeout(struct rpc_task *task);
+static void	xprt_disconnect(struct rpc_xprt *);
 static void	xprt_reconn_status(struct rpc_task *task);
 static struct socket *xprt_create_socket(int, struct sockaddr_in *,
 					struct rpc_timeout *);
@@ -127,54 +132,42 @@
 static inline struct rpc_xprt *
 xprt_from_sock(struct sock *sk)
 {
-#ifndef SOCK_HAS_USER_DATA
-	struct rpc_xprt		*xprt;
-
-	for (xprt = sock_list; xprt && sk != xprt->inet; xprt = xprt->link)
-		;
-	return xprt;
-#else
 	return (struct rpc_xprt *) sk->user_data;
-#endif
 }
 
 /*
  *	Adjust the iovec to move on 'n' bytes
  */
  
-extern inline void xprt_move_iov(struct msghdr *msg, struct iovec *niv, int amount)
+extern inline void
+xprt_move_iov(struct msghdr *msg, struct iovec *niv, int amount)
 {
 	struct iovec *iv=msg->msg_iov;
+	int i;
 	
 	/*
 	 *	Eat any sent iovecs
 	 */
-
-	while(iv->iov_len < amount)
-	{
-		amount-=iv->iov_len;
+	while(iv->iov_len <= amount) {
+		amount -= iv->iov_len;
 		iv++;
 		msg->msg_iovlen--;
 	}
-	
-	msg->msg_iov=niv;
-	
+
 	/*
 	 *	And chew down the partial one
 	 */
-
 	niv[0].iov_len = iv->iov_len-amount;
 	niv[0].iov_base =((unsigned char *)iv->iov_base)+amount;
 	iv++;
-	
+
 	/*
 	 *	And copy any others
 	 */
-	 
-	for(amount=1;amount<msg->msg_iovlen; amount++)
-	{
-		niv[amount]=*iv++;
-	}
+	for(i = 1; i < msg->msg_iovlen; i++)
+		niv[i]=*iv++;
+
+	msg->msg_iov=niv;
 }
  
 /*
@@ -182,43 +175,45 @@
  */
 
 static inline int
-xprt_sendmsg(struct rpc_xprt *xprt)
+xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
 	struct socket	*sock = xprt->sock;
 	struct msghdr	msg;
 	mm_segment_t	oldfs;
 	int		result;
+	int		slen = req->rq_slen - req->rq_bytes_sent;
 	struct iovec	niv[MAX_IOVEC];
 
+	if (slen == 0)
+		return 0;
+
+	if (!sock)
+		return -ENOTCONN;
+
 	xprt_pktdump("packet data:",
-				xprt->snd_buf.io_vec->iov_base,
-				xprt->snd_buf.io_vec->iov_len);
+				req->rq_svec->iov_base,
+				req->rq_svec->iov_len);
 
-	msg.msg_flags   = MSG_DONTWAIT;
-	msg.msg_iov	= xprt->snd_buf.io_vec;
-	msg.msg_iovlen	= xprt->snd_buf.io_nr;
+	msg.msg_flags   = MSG_DONTWAIT|MSG_NOSIGNAL;
+	msg.msg_iov	= req->rq_svec;
+	msg.msg_iovlen	= req->rq_snr;
 	msg.msg_name	= (struct sockaddr *) &xprt->addr;
 	msg.msg_namelen = sizeof(xprt->addr);
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 
 	/* Dont repeat bytes */
-	
-	if(xprt->snd_sent)
-		xprt_move_iov(&msg, niv, xprt->snd_sent);
-		
+	if (req->rq_bytes_sent)
+		xprt_move_iov(&msg, niv, req->rq_bytes_sent);
+
 	oldfs = get_fs(); set_fs(get_ds());
-	result = sock_sendmsg(sock, &msg, xprt->snd_buf.io_len);
+	result = sock_sendmsg(sock, &msg, slen);
 	set_fs(oldfs);
 
-	dprintk("RPC:      xprt_sendmsg(%d) = %d\n",
-				xprt->snd_buf.io_len, result);
+	dprintk("RPC:      xprt_sendmsg(%d) = %d\n", slen, result);
 
-	if (result >= 0) {
-		xprt->snd_buf.io_len -= result;
-		xprt->snd_sent += result;
+	if (result >= 0)
 		return result;
-	}
 
 	switch (result) {
 	case -ECONNREFUSED:
@@ -227,9 +222,14 @@
 		 */
 		break;
 	case -EAGAIN:
-		return 0;
-	case -ENOTCONN: case -EPIPE:
+		if (sock->flags & SO_NOSPACE)
+			result = -ENOMEM;
+		break;
+	case -ENOTCONN:
+	case -EPIPE:
 		/* connection broken */
+		if (xprt->stream)
+			result = -ENOTCONN;
 		break;
 	default:
 		printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
@@ -250,21 +250,10 @@
 	mm_segment_t	oldfs;
 	int		result;
 
-#if LINUX_VERSION_CODE >= 0x020100
-	msg.msg_flags   = MSG_DONTWAIT;
-	msg.msg_iov	= iov;
-	msg.msg_iovlen	= nr;
-	msg.msg_name	= &sin;
-	msg.msg_namelen = sizeof(sin);
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
+	if (!sock)
+		return -ENOTCONN;
 
-	oldfs = get_fs(); set_fs(get_ds());
-	result = sock_recvmsg(sock, &msg, len, MSG_DONTWAIT);
-	set_fs(oldfs);
-#else
-	int		alen = sizeof(sin);
-	msg.msg_flags   = 0;
+	msg.msg_flags   = MSG_DONTWAIT|MSG_NOSIGNAL;
 	msg.msg_iov	= iov;
 	msg.msg_iovlen	= nr;
 	msg.msg_name	= &sin;
@@ -273,9 +262,8 @@
 	msg.msg_controllen = 0;
 
 	oldfs = get_fs(); set_fs(get_ds());
-	result = sock->ops->recvmsg(sock, &msg, len, 1, 0, &alen);
+	result = sock_recvmsg(sock, &msg, len, MSG_DONTWAIT);
 	set_fs(oldfs);
-#endif
 
 	dprintk("RPC:      xprt_recvmsg(iov %p, len %d) = %d\n",
 						iov, len, result);
@@ -327,21 +315,30 @@
 int
 xprt_adjust_timeout(struct rpc_timeout *to)
 {
-	if (to->to_exponential)
-		to->to_current <<= 1;
-	else
-		to->to_current += to->to_increment;
-	if (to->to_maxval && to->to_current >= to->to_maxval) {
-		to->to_current = to->to_maxval;
-		to->to_retries = 0;
+	if (to->to_retries > 0) {
+		if (to->to_exponential)
+			to->to_current <<= 1;
+		else
+			to->to_current += to->to_increment;
+		if (to->to_maxval && to->to_current >= to->to_maxval)
+			to->to_current = to->to_maxval;
+	} else {
+		if (to->to_exponential)
+			to->to_initval <<= 1;
+		else
+			to->to_initval += to->to_increment;
+		if (to->to_maxval && to->to_initval >= to->to_maxval)
+			to->to_initval = to->to_maxval;
+		to->to_current = to->to_initval;
 	}
+
 	if (!to->to_current) {
 		printk(KERN_WARNING "xprt_adjust_timeout: to_current = 0!\n");
 		to->to_current = 5 * HZ;
 	}
 	pprintk("RPC: %lu %s\n", jiffies,
 			to->to_retries? "retrans" : "timeout");
-	return (to->to_retries)--;
+	return to->to_retries-- > 0;
 }
 
 /*
@@ -352,23 +349,26 @@
 {
 	struct sock	*sk = xprt->inet;
 
-#ifdef SOCK_HAS_USER_DATA
+	xprt_disconnect(xprt);
+	if (!sk)
+		return;
+
 	sk->user_data    = NULL;
-#endif
 	sk->data_ready   = xprt->old_data_ready;
 	sk->state_change = xprt->old_state_change;
 	sk->write_space  = xprt->old_write_space;
 
-	if (xprt->file)
-		fput(xprt->file);
-	else
-		sock_release(xprt->sock);
+	xprt->connected = 0;
+
+	sock_release(xprt->sock);
 	/*
 	 *	TCP doesnt require the rpciod now - other things may
 	 *	but rpciod handles that not us.
 	 */
 	if(xprt->stream && !xprt->connecting)
 		rpciod_down();
+	xprt->inet = NULL;
+	xprt->sock = NULL;
 }
 
 /*
@@ -378,9 +378,19 @@
 xprt_disconnect(struct rpc_xprt *xprt)
 {
 	dprintk("RPC:      disconnected transport %p\n", xprt);
+	xprt->tcp_offset = 0;
+	xprt->tcp_more = 0;
+	xprt->tcp_total = 0;
+	xprt->tcp_reclen = 0;
+	xprt->tcp_copied = 0;
+	if (xprt->tcp_rqstp) {
+		struct rpc_task *task = xprt->tcp_rqstp->rq_task;
+		task->tk_status = -EIO;
+		rpc_wake_up_task(task);
+		xprt->tcp_rqstp  = NULL;
+	}
+	xprt->rx_pending_flag = 0;
 	rpc_wake_up_status(&xprt->pending, -ENOTCONN);
-	rpc_wake_up_status(&xprt->sending, -ENOTCONN);
-	xprt->connected = 0;
 }
 
 /*
@@ -392,107 +402,100 @@
 	struct rpc_xprt	*xprt = task->tk_xprt;
 	struct socket	*sock;
 	struct sock	*inet;
+	unsigned long	oldflags;
 	int		status;
 
 	dprintk("RPC: %4d xprt_reconnect %p connected %d\n",
 				task->tk_pid, xprt, xprt->connected);
 	task->tk_status = 0;
 
+	if (xprt->shutdown)
+		return;
+
+	if (!xprt->stream)
+		return;
+
+	spin_lock_irqsave(&xprt_sock_lock, oldflags);
+	if (xprt->connected) {
+		spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
+		return;
+	}
 	if (xprt->connecting) {
-		task->tk_timeout = xprt->timeout.to_maxval;
+		task->tk_timeout = 0;
 		rpc_sleep_on(&xprt->reconn, task, NULL, NULL);
+		spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
 		return;
 	}
 	xprt->connecting = 1;
+	spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
 
 	/* Create an unconnected socket */
-	if (!(sock = xprt_create_socket(xprt->prot, NULL, &xprt->timeout)))
+	if (!(sock = xprt_create_socket(xprt->prot, NULL, &xprt->timeout))) {
+		xprt->connecting = 0;
 		goto defer;
+	}
 
-#if LINUX_VERSION_CODE >= 0x020100
 	inet = sock->sk;
-#else
-	inet = (struct sock *) sock->data;
-#endif
 	inet->data_ready   = xprt->inet->data_ready;
 	inet->state_change = xprt->inet->state_change;
 	inet->write_space  = xprt->inet->write_space;
-#ifdef SOCK_HAS_USER_DATA
 	inet->user_data    = xprt;
-#endif
 
 	dprintk("RPC: %4d closing old socket\n", task->tk_pid);
-	xprt_disconnect(xprt);
 	xprt_close(xprt);
 
-	/* Reset to new socket and default congestion */
+	/* Reset to new socket */
 	xprt->sock = sock;
 	xprt->inet = inet;
-	xprt->cwnd = RPC_INITCWND;
 
 	/* Now connect it asynchronously. */
 	dprintk("RPC: %4d connecting new socket\n", task->tk_pid);
 	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
 				sizeof(xprt->addr), O_NONBLOCK);
+
 	if (status < 0) {
 		if (status != -EINPROGRESS && status != -EALREADY) {
 			printk("RPC: TCP connect error %d!\n", -status);
+			xprt->connecting = 0;
 			goto defer;
 		}
 
 		dprintk("RPC: %4d connect status %d connected %d\n",
 				task->tk_pid, status, xprt->connected);
-		task->tk_timeout = 60 * HZ;
 
-		start_bh_atomic();
+		spin_lock_irqsave(&xprt_sock_lock, oldflags);
 		if (!xprt->connected) {
-			rpc_sleep_on(&xprt->reconn, task,
-				xprt_reconn_status, xprt_reconn_timeout);
-			end_bh_atomic();
+			task->tk_timeout = xprt->timeout.to_maxval;
+			rpc_sleep_on(&xprt->reconn, task, xprt_reconn_status, NULL);
+			spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
 			return;
 		}
-		end_bh_atomic();
+		spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
 	}
 
-	xprt->connecting = 0;
-	rpc_wake_up(&xprt->reconn);
-	return;
 
 defer:
-	task->tk_timeout = 30 * HZ;
-	rpc_sleep_on(&xprt->reconn, task, NULL, NULL);
-	xprt->connecting = 0;
+	rpc_wake_up(&xprt->reconn);
 }
 
 /*
- * Reconnect status
+ * Reconnect timeout. We just mark the transport as not being in the
+ * process of reconnecting, and leave the rest to the upper layers.
  */
 static void
 xprt_reconn_status(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
+	unsigned long	oldflags;
 
-	dprintk("RPC: %4d xprt_reconn_status %d\n",
-				task->tk_pid, task->tk_status);
-	if (!xprt->connected && task->tk_status != -ETIMEDOUT) {
-		task->tk_timeout = 30 * HZ;
-		rpc_sleep_on(&xprt->reconn, task, NULL, xprt_reconn_timeout);
-	}
-}
-
-/*
- * Reconnect timeout. We just mark the transport as not being in the
- * process of reconnecting, and leave the rest to the upper layers.
- */
-static void
-xprt_reconn_timeout(struct rpc_task *task)
-{
 	dprintk("RPC: %4d xprt_reconn_timeout %d\n",
 				task->tk_pid, task->tk_status);
-	task->tk_status = -ENOTCONN;
-	task->tk_xprt->connecting = 0;
-	task->tk_timeout = 0;
-	rpc_wake_up_task(task);
+
+	spin_lock_irqsave(&xprt_sock_lock, oldflags);
+	if (xprt->connecting)
+		xprt->connecting = 0;
+	spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
+	rpc_wake_up(&xprt->reconn);
 }
 
 /*
@@ -509,16 +512,19 @@
 		task = head;
 		do {
 			if ((req = task->tk_rqstp) && req->rq_xid == xid)
-				return req;
+				goto out;
 			task = task->tk_next;
 			if (++safe > 100) {
 				printk("xprt_lookup_rqst: loop in Q!\n");
-				return NULL;
+				goto out_bad;
 			}
 		} while (task != head);
 	}
 	dprintk("RPC:      unknown XID %08x in reply.\n", xid);
-	return NULL;
+ out_bad:
+	req = NULL;
+ out:
+	return req;
 }
 
 /*
@@ -531,7 +537,6 @@
 	struct rpc_task	*task = req->rq_task;
 
 	req->rq_rlen   = copied;
-	req->rq_gotit  = 1;
 
 	/* Adjust congestion window */
 	xprt_adjust_cwnd(xprt, copied);
@@ -575,43 +580,62 @@
 	struct rpc_rqst *rovr;
 	struct sk_buff	*skb;
 	struct iovec	iov[MAX_IOVEC];
+	unsigned long	oldflags;
 	int		err, repsize, copied;
 
 	dprintk("RPC:      udp_data_ready...\n");
-	if (!(xprt = xprt_from_sock(sk)))
+	if (!(xprt = xprt_from_sock(sk))) {
+		printk("RPC:      udp_data_ready request not found!\n");
 		return;
+	}
+
 	dprintk("RPC:      udp_data_ready client %p\n", xprt);
 
 	if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
-		return;
-	repsize = skb->len - 8;	/* don't account for UDP header */
+		goto out_err;
 
+	repsize = skb->len - sizeof(struct udphdr);
 	if (repsize < 4) {
 		printk("RPC: impossible RPC reply size %d!\n", repsize);
 		goto dropit;
 	}
 
+	/* Lock the rpc_queue to prevent task from disappearing beneath us */
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	/* Look up the request corresponding to the given XID */
-	if (!(rovr = xprt_lookup_rqst(xprt, *(u32 *) (skb->h.raw + 8))))
+	rovr = xprt_lookup_rqst(xprt, *(u32 *) (skb->h.raw + sizeof(struct udphdr)));
+	if (!rovr) {
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 		goto dropit;
+	}
 	task = rovr->rq_task;
+	if (!rpc_lock_task(task)) {
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+		goto dropit;
+	}
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 
 	dprintk("RPC: %4d received reply\n", task->tk_pid);
-	xprt_pktdump("packet data:", (u32 *) (skb->h.raw+8), repsize);
+	xprt_pktdump("packet data:",
+		     (u32 *) (skb->h.raw+sizeof(struct udphdr)), repsize);
 
 	if ((copied = rovr->rq_rlen) > repsize)
 		copied = repsize;
 
 	/* Okay, we have it. Copy datagram... */
+	rovr->rq_damaged  = 1;
 	memcpy(iov, rovr->rq_rvec, rovr->rq_rnr * sizeof(iov[0]));
 	/* This needs to stay tied with the usermode skb_copy_dagram... */
 	memcpy_tokerneliovec(iov, skb->data+8, copied);
 
 	xprt_complete_rqst(xprt, rovr, copied);
+	rpc_unlock_task(task);
 
-dropit:
+ dropit:
 	skb_free_datagram(sk, skb);
-	return;
+ out_err:
+	if (xprt->old_data_ready)
+		xprt->old_data_ready(sk, 0);
 }
 
 /*
@@ -629,10 +653,12 @@
 	struct rpc_rqst	*req;
 	struct iovec	*iov;
 	struct iovec	riov;
+	unsigned long	oldflags;
 	u32		offset;
 	int		result, maxcpy, reclen, avail, want;
 
 	dprintk("RPC:      tcp_input_record\n");
+
 	offset = xprt->tcp_offset;
 	result = -EAGAIN;
 	if (offset < 4 || (!xprt->tcp_more && offset < 8)) {
@@ -641,11 +667,6 @@
 		riov.iov_base = xprt->tcp_recm.data + offset;
 		riov.iov_len  = want;
 		result = xprt_recvmsg(xprt, &riov, 1, want);
-		if (!result)
-		{
-			dprintk("RPC: empty TCP record.\n");
-			return -ENOTCONN;
-		}
 		if (result < 0)
 			goto done;
 		offset += result;
@@ -664,7 +685,8 @@
 
 		dprintk("RPC:      got xid %08x reclen %d morefrags %d\n",
 			xprt->tcp_xid, xprt->tcp_reclen, xprt->tcp_more);
-		if (!xprt->tcp_copied
+		spin_lock_irqsave(&rpc_queue_lock, oldflags);
+		if (!xprt->tcp_copied && !xprt->tcp_rqstp
 		 && (req = xprt_lookup_rqst(xprt, xprt->tcp_xid))) {
 			iov = xprt->tcp_iovec;
 			memcpy(iov, req->rq_rvec, req->rq_rnr * sizeof(iov[0]));
@@ -676,27 +698,47 @@
 			xprt->tcp_copied = 4;
 			xprt->tcp_rqstp  = req;
 		}
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 	} else {
 		reclen = xprt->tcp_reclen;
 	}
 
 	avail = reclen - (offset - 4);
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
 	if ((req = xprt->tcp_rqstp) && req->rq_xid == xprt->tcp_xid
 	 && req->rq_task->tk_rpcwait == &xprt->pending) {
+		struct rpc_task *task = req->rq_task;
+
+		/* Can we get a lock on the task ? */
+		if (!rpc_lock_task(task)) {
+			/* No, so we discard and wake up */
+			task->tk_status = -EAGAIN;
+			spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+			if (!RPC_IS_RUNNING(task))
+				rpc_wake_up_task(task);
+			goto discard;
+		}
+
 		want = MIN(req->rq_rlen - xprt->tcp_copied, avail);
 
 		dprintk("RPC: %4d TCP receiving %d bytes\n",
 					req->rq_task->tk_pid, want);
+		/* Request must be re-encoded before retransmit */
+		req->rq_damaged = 1;
+
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 		result = xprt_recvmsg(xprt, xprt->tcp_iovec, req->rq_rnr, want);
-		if (!result && want)
-			result = -EAGAIN;
-		if (result < 0)
+
+		if (result < 0) {
+			rpc_unlock_task(task);
 			goto done;
+		}
 		xprt->tcp_copied += result;
 		offset += result;
 		avail  -= result;
 		if (result < want) {
 			result = -EAGAIN;
+			rpc_unlock_task(task);
 			goto done;
 		}
 
@@ -708,12 +750,13 @@
 			xprt->tcp_copied = 0;
 			xprt->tcp_rqstp  = NULL;
 		}
-		/* Request must be re-encoded before retransmit */
-		req->rq_damaged = 1;
-	}
+		rpc_unlock_task(task);
+	} else
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
 
+ discard:
 	/* Skip over any trailing bytes on short reads */
-	while (avail) {
+	while (avail > 0) {
 		static u8	dummy[64];
 
 		want = MIN(avail, sizeof(dummy));
@@ -721,8 +764,6 @@
 		riov.iov_len  = want;
 		dprintk("RPC:      TCP skipping %d bytes\n", want);
 		result = xprt_recvmsg(xprt, &riov, 1, want);
-		if (!result && want)
-			result=-EAGAIN;
 		if (result < 0)
 			goto done;
 		offset += result;
@@ -743,55 +784,44 @@
 	return result;
 }
 
-static __inline__ void tcp_output_record(struct rpc_xprt *xprt)
-{
-	if(xprt->snd_sent && xprt->snd_task)
-		dprintk("RPC: write space\n");
-	if(xprt->write_space == 0)
-	{
-		xprt->write_space = 1;
-		if (xprt->snd_task && !RPC_IS_RUNNING(xprt->snd_task))
-		{
-			if(xprt->snd_sent)
-				dprintk("RPC: Write wakeup snd_sent =%d\n",
-					xprt->snd_sent);
-			rpc_wake_up_task(xprt->snd_task);			
-		}
-	}
-}
-
 /*
  *	TCP task queue stuff
  */
  
-static struct rpc_xprt *rpc_rx_xprt_pending = NULL;	/* Chain by rx_pending of rpc_xprt's */
-static struct rpc_xprt *rpc_tx_xprt_pending = NULL;	/* Chain by tx_pending of rpc_xprt's */
+static struct rpc_xprt *rpc_xprt_pending = NULL;	/* Chain by rx_pending of rpc_xprt's */
 
 /*
  *	This is protected from tcp_data_ready and the stack as its run
  *	inside of the RPC I/O daemon
  */
-
-void rpciod_tcp_dispatcher(void)
+void
+rpciod_tcp_dispatcher(void)
 {
 	struct rpc_xprt *xprt;
+	unsigned long	oldflags;
 	int result;
 
 	dprintk("rpciod_tcp_dispatcher: Queue Running\n");
-	
+
 	/*
 	 *	Empty each pending socket
 	 */
-	 
-	while((xprt=rpc_rx_xprt_pending)!=NULL)
-	{
+ 
+	while(1) {
 		int safe_retry=0;
-		
-		rpc_rx_xprt_pending=xprt->rx_pending;
-		xprt->rx_pending_flag=0;
-		
+
+		spin_lock_irqsave(&rpc_queue_lock, oldflags);
+		if ((xprt = rpc_xprt_pending) == NULL) {
+			spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+			break;
+		}
+		xprt->rx_pending_flag = 0;
+		rpc_xprt_pending=xprt->rx_pending;
+		xprt->rx_pending = NULL;
+		spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+
 		dprintk("rpciod_tcp_dispatcher: Processing %p\n", xprt);
-		
+
 		do 
 		{
 			if (safe_retry++ > 50)
@@ -799,28 +829,23 @@
 			result = tcp_input_record(xprt);
 		}
 		while (result >= 0);
-	
+
 		switch (result) {
 			case -EAGAIN:
-				continue;
 			case -ENOTCONN:
 			case -EPIPE:
-				xprt_disconnect(xprt);
 				continue;
 			default:
 				printk(KERN_WARNING "RPC: unexpected error %d from tcp_input_record\n",
 					result);
 		}
 	}
-
-	while((xprt=rpc_tx_xprt_pending)!=NULL)
-	{
-		rpc_tx_xprt_pending = xprt->tx_pending;
-		xprt->tx_pending_flag = 0;
-		tcp_output_record(xprt);
-	}
 }
 
+int xprt_tcp_pending(void)
+{
+	return rpc_xprt_pending != NULL;
+}
 
 extern inline void tcp_rpciod_queue(void)
 {
@@ -837,6 +862,7 @@
 static void tcp_data_ready(struct sock *sk, int len)
 {
 	struct rpc_xprt	*xprt;
+	unsigned long	oldflags;
 
 	dprintk("RPC:      tcp_data_ready...\n");
 	if (!(xprt = xprt_from_sock(sk)))
@@ -844,32 +870,26 @@
 		printk("Not a socket with xprt %p\n", sk);
 		return;
 	}
+
 	dprintk("RPC:      tcp_data_ready client %p\n", xprt);
 	dprintk("RPC:      state %x conn %d dead %d zapped %d\n",
 				sk->state, xprt->connected,
 				sk->dead, sk->zapped);
-	/*
-	 *	If we are not waiting for the RPC bh run then
-	 *	we are now
-	 */
-	if (!xprt->rx_pending_flag)
-	{
-		int start_queue=0;
 
-		dprintk("RPC:     xprt queue %p\n", rpc_rx_xprt_pending);
-		if(rpc_rx_xprt_pending==NULL)
-			start_queue=1;
+	spin_lock_irqsave(&rpc_queue_lock, oldflags);
+	if (!xprt->rx_pending_flag) {
+		dprintk("RPC:     xprt queue %p\n", rpc_xprt_pending);
+
+		xprt->rx_pending=rpc_xprt_pending;
+		rpc_xprt_pending=xprt;
 		xprt->rx_pending_flag=1;
-		xprt->rx_pending=rpc_rx_xprt_pending;
-		rpc_rx_xprt_pending=xprt;
-		if (start_queue)
-		  {
-		    tcp_rpciod_queue();
-		    start_queue=0;
-		  }
-	}
-	else
+	} else
 		dprintk("RPC:     xprt queued already %p\n", xprt);
+	spin_unlock_irqrestore(&rpc_queue_lock, oldflags);
+	tcp_rpciod_queue();
+
+	if (xprt->old_data_ready)
+		xprt->old_data_ready(sk, 0);
 }
 
 
@@ -877,6 +897,7 @@
 tcp_state_change(struct sock *sk)
 {
 	struct rpc_xprt	*xprt;
+	unsigned long oldflags;
 
 	if (!(xprt = xprt_from_sock(sk)))
 		return;
@@ -885,35 +906,95 @@
 				sk->state, xprt->connected,
 				sk->dead, sk->zapped);
 
-	if (sk->state == TCP_ESTABLISHED && !xprt->connected) {
+	spin_lock_irqsave(&xprt_sock_lock, oldflags);
+	switch(sk->state) {
+	case TCP_ESTABLISHED:
+	case TCP_LISTEN:
 		xprt->connected = 1;
 		xprt->connecting = 0;
+		spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
 		rpc_wake_up(&xprt->reconn);
-	} else if (sk->zapped) {
+		rpc_wake_up_next(&xprt->sending);
+		tcp_rpciod_queue();
+		break;
+	default:
+		xprt->connected = 0;
+		if (xprt->connecting) {
+			spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
+			break;
+		}
+		spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
 		rpc_wake_up_status(&xprt->pending, -ENOTCONN);
-		rpc_wake_up_status(&xprt->sending, -ENOTCONN);
 		rpc_wake_up_status(&xprt->reconn,  -ENOTCONN);
+		break;
 	}
+	if (xprt->old_state_change)
+		xprt->old_state_change(sk);
 }
 
+/*
+ * The following 2 routines allow a task to sleep while socket memory is
+ * low.
+ */
 static void
 tcp_write_space(struct sock *sk)
 {
 	struct rpc_xprt	*xprt;
+	unsigned long	oldflags;
 
 	if (!(xprt = xprt_from_sock(sk)))
 		return;
-	if (!xprt->tx_pending_flag) {
-		int start_queue = 0;
 
-		if (rpc_tx_xprt_pending == NULL)
-			start_queue = 1;
-		xprt->tx_pending_flag = 1;
-		xprt->tx_pending = rpc_tx_xprt_pending;
-		rpc_tx_xprt_pending = xprt;
-		if (start_queue)
-			tcp_rpciod_queue();
-	}
+	/* Wait until we have enough socket memory */
+	if (sock_wspace(sk) < min(sk->sndbuf,XPRT_MIN_WRITE_SPACE))
+		goto out;
+
+	spin_lock_irqsave(&xprt_sock_lock, oldflags);
+	if (xprt->write_space)
+		goto out_unlock;
+
+	xprt->write_space = 1;
+
+	if (!xprt->snd_task)
+		rpc_wake_up_next(&xprt->sending);
+	else if (xprt->snd_task->tk_rpcwait == &xprt->sending)
+		rpc_wake_up_task(xprt->snd_task);
+ out_unlock:
+	spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
+ out:
+	if (xprt->old_write_space)
+		xprt->old_write_space(sk);
+}
+
+static void
+udp_write_space(struct sock *sk)
+{
+	struct rpc_xprt *xprt;
+	unsigned long	oldflags;
+
+	if (!(xprt = xprt_from_sock(sk)))
+		return;
+
+
+	/* Wait until we have enough socket memory */
+	if (sock_wspace(sk) < min(sk->sndbuf,XPRT_MIN_WRITE_SPACE))
+		goto out;
+
+	spin_lock_irqsave(&xprt_sock_lock, oldflags);
+	if (xprt->write_space)
+		goto out_unlock;
+
+	xprt->write_space = 1;
+
+	if (!xprt->snd_task)
+		rpc_wake_up_next(&xprt->sending);
+	else if (xprt->snd_task->tk_rpcwait == &xprt->sending)
+		rpc_wake_up_task(xprt->snd_task);
+ out_unlock:
+	spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
+ out:
+	if (xprt->old_write_space)
+		xprt->old_write_space(sk);
 }
 
 /*
@@ -936,32 +1017,51 @@
 	rpc_wake_up_task(task);
 }
 
+
 /*
- * (Partly) transmit the RPC packet
- * Note that task->tk_status is either 0 or negative on return.
- * Only when the reply is received will the status be set to a
- * positive value.
+ * Serialize access to sockets, in order to prevent different
+ * requests from interfering with each other.
  */
-static inline int
-xprt_transmit_some(struct rpc_xprt *xprt, struct rpc_task *task)
+static int
+xprt_down_transmit(struct rpc_task *task)
 {
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	int		result;
+	unsigned long	oldflags;
 
-	task->tk_status = 0;
-	if ((result = xprt_sendmsg(xprt)) >= 0) {
-		if (!xprt->snd_buf.io_len || !xprt->stream) {
-			rpc_wake_up_next(&xprt->sending);
-			return req->rq_slen;
-		}
-		result = -EAGAIN;
-	} else if (xprt->stream) {
-		if (result == -ENOTCONN || result == -EPIPE) {
-			xprt_disconnect(xprt);
-			result = -ENOTCONN;
-		}
+	spin_lock_irqsave(&xprt_sock_lock, oldflags);
+	if (xprt->snd_task && xprt->snd_task != task) {
+		dprintk("RPC: %4d TCP write queue full (task %d)\n",
+			task->tk_pid, xprt->snd_task->tk_pid);
+		task->tk_timeout = 0;
+		task->tk_status = -EAGAIN;
+		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+	} else if (!xprt->snd_task) {
+		xprt->snd_task = task;
+#ifdef RPC_PROFILE
+		req->rq_xtime = jiffies;
+#endif
+		req->rq_bytes_sent = 0;
+	}
+	spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
+	return xprt->snd_task == task;
+}
+
+/*
+ * Releases the socket for use by other requests.
+ */
+static void
+xprt_up_transmit(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+	unsigned long	oldflags;
+
+	if (xprt->snd_task && xprt->snd_task == task) {
+		spin_lock_irqsave(&xprt_sock_lock, oldflags);
+		xprt->snd_task = NULL;
+		rpc_wake_up_next(&xprt->sending);
+		spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
 	}
-	return task->tk_status = result;
 }
 
 /*
@@ -971,75 +1071,58 @@
 void
 xprt_transmit(struct rpc_task *task)
 {
-	struct rpc_timeout *timeo;
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_xprt	*xprt = req->rq_xprt;
-	int status;
 
 	dprintk("RPC: %4d xprt_transmit(%x)\n", task->tk_pid, 
 				*(u32 *)(req->rq_svec[0].iov_base));
 
-	if (xprt->shutdown) {
+	if (xprt->shutdown)
 		task->tk_status = -EIO;
+
+	if (task->tk_status < 0)
 		return;
-	}
 
-	/* If we're not already in the process of transmitting our call,
-	 * set up everything as needed. */
-	if (xprt->snd_task != task) {
-		/* Write the record marker */
-		if (xprt->stream) {
-			u32	marker;
+	/* set up everything as needed. */
+	/* Write the record marker */
+	if (xprt->stream) {
+		u32	marker;
 
-			if (!xprt->connected) {
-				task->tk_status = -ENOTCONN;
-				return;
-			}
-			marker = htonl(0x80000000|(req->rq_slen-4));
-			*((u32 *) req->rq_svec[0].iov_base) = marker;
-		}
+		marker = htonl(0x80000000|(req->rq_slen-4));
+		*((u32 *) req->rq_svec[0].iov_base) = marker;
 
-		/* Reset timeout parameters */
-		timeo = &req->rq_timeout;
-		if (timeo->to_retries < 0) {
-			dprintk("RPC: %4d xprt_transmit reset timeo\n",
-						task->tk_pid);
-			timeo->to_retries = xprt->timeout.to_retries;
-			timeo->to_current = timeo->to_initval;
-		}
+	}
 
-#ifdef RPC_PROFILE
-		req->rq_xtime = jiffies;
-#endif
-		req->rq_gotit = 0;
+	if (!xprt_down_transmit(task))
+		return;
 
-		if (xprt->snd_task) {
-			dprintk("RPC: %4d TCP write queue full (task %d)\n",
-					task->tk_pid, xprt->snd_task->tk_pid);
-			rpc_sleep_on(&xprt->sending, task,
-					xprt_transmit_status, NULL);
-			return;
-		}
-		xprt->snd_buf  = req->rq_snd_buf;
-		xprt->snd_task = task;
-		xprt->snd_sent = 0;
+	do_xprt_transmit(task);
+}
+
+static void
+do_xprt_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst	*req = task->tk_rqstp;
+	struct rpc_xprt	*xprt = req->rq_xprt;
+	unsigned long	oldflags;
+	int status, retry = 0;
+
+	if (xprt->shutdown) {
+		task->tk_status = -EIO;
+		goto out_release;
 	}
 
+	if (task->tk_rpcwait)
+		rpc_remove_wait_queue(task);
+
 	/* For fast networks/servers we have to put the request on
 	 * the pending list now:
+	 * Note that we don't want the task timing out during the
+	 * call to xprt_sendmsg(), so we initially disable the timeout,
+	 * and then reset it later...
 	 */
-	start_bh_atomic();
-	status = rpc_add_wait_queue(&xprt->pending, task);
-	if (!status)
-		task->tk_callback = NULL;
-	end_bh_atomic();
-
-	if (status)
-	{
-		printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
-		task->tk_status = status;
-		return;
-	}
+	task->tk_timeout = 0;
+	xprt_receive(task);
 
 	/* Continue transmitting the packet/record. We must be careful
 	 * to cope with writespace callbacks arriving _after_ we have
@@ -1047,27 +1130,78 @@
 	 */
 	while (1) {
 		xprt->write_space = 0;
-		if (xprt_transmit_some(xprt, task) != -EAGAIN) {
+		status = xprt_sendmsg(xprt, req);
+
+		if (status < 0)
+			break;
+
+		if (xprt->stream) {
+			req->rq_bytes_sent += status;
+
+			if (req->rq_bytes_sent >= req->rq_slen)
+				goto out_receive;
+		}
+
+		if (status < req->rq_slen)
+			status = -EAGAIN;
+
+		if (status >= 0) {
 			dprintk("RPC: %4d xmit complete\n", task->tk_pid);
-			xprt->snd_task = NULL;
-			return;
+			goto out_receive;
 		}
 
-		/*d*/dprintk("RPC: %4d xmit incomplete (%d left of %d)\n",
-				task->tk_pid, xprt->snd_buf.io_len,
+		if (!xprt->stream)
+			break;
+
+		dprintk("RPC: %4d xmit incomplete (%d left of %d)\n",
+				task->tk_pid, req->rq_slen - req->rq_bytes_sent,
 				req->rq_slen);
-		task->tk_status = 0;
-		start_bh_atomic();
-		if (!xprt->write_space) {
-			/* Remove from pending */
-			rpc_remove_wait_queue(task);
-			rpc_sleep_on(&xprt->sending, task,
-					xprt_transmit_status, NULL);
-			end_bh_atomic();
-			return;
-		}
-		end_bh_atomic();
+
+		if (retry++ > 50)
+			break;
+	}
+	rpc_unlock_task(task);
+
+	task->tk_status = (status == -ENOMEM) ? -EAGAIN : status;
+
+	/* Note: at this point, task->tk_sleeping has not yet been set,
+	 *	 hence there is no danger of the waking up task being put on
+	 *	 schedq, and being picked up by a parallel run of rpciod().
+	 */
+	rpc_wake_up_task(task);
+	if (!RPC_IS_RUNNING(task))
+		goto out_release;
+
+	task->tk_timeout = req->rq_timeout.to_current;
+
+	if (status == -ENOMEM || status == -EAGAIN) {
+		spin_lock_irqsave(&xprt_sock_lock, oldflags);
+		if (!xprt->write_space)
+			rpc_sleep_on(&xprt->sending, task, xprt_transmit_status,
+				     NULL);
+		spin_unlock_irqrestore(&xprt_sock_lock, oldflags);
+		return;
 	}
+
+	/*
+	 * If there's some socket problem sleep a bit first while still
+	 * holding the socket.
+	 */
+	if (status == -ECONNREFUSED || status == -ENOTCONN) {
+		rpc_sleep_on(&xprt->sending, task, xprt_transmit_status, NULL);
+		return;
+	}
+
+ out_release:
+	xprt_up_transmit(task);
+	return;
+ out_receive:
+	/* Reset the task timeout values */
+	task->tk_timeout = req->rq_timeout.to_current;
+	xprt_receive(task);
+	rpc_unlock_task(task);
+	rpc_unlock_task(task);
+	goto out_release;
 }
 
 /*
@@ -1080,16 +1214,8 @@
 	struct rpc_xprt	*xprt = task->tk_client->cl_xprt;
 
 	dprintk("RPC: %4d transmit_status %d\n", task->tk_pid, task->tk_status);
-	if (xprt->snd_task == task) 
-	{
-		if (task->tk_status < 0)
-		{
-			xprt->snd_task = NULL;
-			xprt_disconnect(xprt);
-		}
-		else
-			xprt_transmit(task);
-	}
+	if (xprt->snd_task == task)
+		xprt_up_transmit(task);
 }
 
 /*
@@ -1104,25 +1230,8 @@
 	struct rpc_xprt	*xprt = req->rq_xprt;
 
 	dprintk("RPC: %4d xprt_receive\n", task->tk_pid);
-	if (xprt->connected == 0) {
-		task->tk_status = -ENOTCONN;
-		return;
-	}
-
-	/*
-	 * Wait until rq_gotit goes non-null, or timeout elapsed.
-	 */
-	task->tk_timeout = req->rq_timeout.to_current;
-
-	start_bh_atomic();
-	if (!req->rq_gotit) {
-		rpc_sleep_on(&xprt->pending, task,
-				xprt_receive_status, xprt_timer);
-	}
-	end_bh_atomic();
 
-	dprintk("RPC: %4d xprt_receive returns %d\n",
-				task->tk_pid, task->tk_status);
+	rpc_sleep_locked(&xprt->pending, task, xprt_receive_status, xprt_timer);
 }
 
 static void
@@ -1130,7 +1239,7 @@
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
-	if (xprt->stream && xprt->tcp_rqstp == task->tk_rqstp)
+	if (xprt->tcp_rqstp == task->tk_rqstp)
 		xprt->tcp_rqstp = NULL;
 }
 
@@ -1148,7 +1257,7 @@
 
 	dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n",
 				task->tk_pid, xprt->cong, xprt->cwnd);
-	if ((!RPCXPRT_CONGESTED(xprt) && xprt->free)) {
+	if (!RPCXPRT_CONGESTED(xprt) && xprt->free) {
 		xprt_reserve_status(task);
 		task->tk_timeout = 0;
 	} else if (!task->tk_timeout) {
@@ -1177,40 +1286,30 @@
 		/* NOP */
 	} else if (task->tk_rqstp) {
 		/* We've already been given a request slot: NOP */
-	} else if (!RPCXPRT_CONGESTED(xprt)) {
+	} else if (!RPCXPRT_CONGESTED(xprt) && xprt->free) {
 		/* OK: There's room for us. Grab a free slot and bump
 		 * congestion value */
-		req = xprt->free;
-		if (!req)
-			goto bad_list;
-		if (req->rq_xid)
-			goto bad_used;
+		spin_lock(&xprt_lock);
+		if (!(req = xprt->free)) {
+			spin_unlock(&xprt_lock);
+			goto out_nofree;
+		}
 		xprt->free     = req->rq_next;
+		req->rq_next   = NULL;
+		spin_unlock(&xprt_lock);
 		xprt->cong    += RPC_CWNDSCALE;
 		task->tk_rqstp = req;
-		req->rq_next   = NULL;
 		xprt_request_init(task, xprt);
-	} else {
-		task->tk_status = -EAGAIN;
-	}
 
-	if (xprt->free && !RPCXPRT_CONGESTED(xprt))
-		rpc_wake_up_next(&xprt->backlog);
+		if (xprt->free)
+			xprt_clear_backlog(xprt);
+	} else
+		goto out_nofree;
 
 	return;
 
-bad_list:
-	printk(KERN_ERR 
-		"RPC: %4d inconsistent free list (cong %ld cwnd %ld)\n",
-		task->tk_pid, xprt->cong, xprt->cwnd);
-	rpc_debug = ~0;
-	goto bummer;
-bad_used:
-	printk(KERN_ERR "RPC: used rqst slot %p on free list!\n", req);
-bummer:
-	task->tk_status = -EIO;
-	xprt->free = NULL;
-	return;
+out_nofree:
+	task->tk_status = -EAGAIN;
 }
 
 /*
@@ -1227,7 +1326,6 @@
 
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, xid);
 	task->tk_status = 0;
-	req->rq_gotit	= 0;
 	req->rq_timeout = xprt->timeout;
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
@@ -1252,46 +1350,24 @@
 
 	dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
 
+	spin_lock(&xprt_lock);
+	req->rq_next = xprt->free;
+	xprt->free   = req;
+
 	/* remove slot from queue of pending */
-	start_bh_atomic();
 	if (task->tk_rpcwait) {
 		printk("RPC: task of released request still queued!\n");
 #ifdef RPC_DEBUG
 		printk("RPC: (task is on %s)\n", rpc_qname(task->tk_rpcwait));
 #endif
-		rpc_del_timer(task);
 		rpc_remove_wait_queue(task);
 	}
-	end_bh_atomic();
+	spin_unlock(&xprt_lock);
 
 	/* Decrease congestion value. */
 	xprt->cong -= RPC_CWNDSCALE;
 
-#if 0
-	/* If congestion threshold is not yet reached, pass on the request slot.
-	 * This looks kind of kludgy, but it guarantees backlogged requests
-	 * are served in order.
-	 * N.B. This doesn't look completely safe, as the task is still
-	 * on the backlog list after wake-up.
-	 */
-	if (!RPCXPRT_CONGESTED(xprt)) {
-		struct rpc_task	*next = rpc_wake_up_next(&xprt->backlog);
-
-		if (next && next->tk_rqstp == 0) {
-			xprt->cong += RPC_CWNDSCALE;
-			next->tk_rqstp = req;
-			xprt_request_init(next, xprt);
-			return;
-		}
-	}
-#endif
-
-	req->rq_next = xprt->free;
-	xprt->free   = req;
-
-	/* If not congested, wake up the next backlogged process */
-	if (!RPCXPRT_CONGESTED(xprt))
-		rpc_wake_up_next(&xprt->backlog);
+	xprt_clear_backlog(xprt);
 }
 
 /*
@@ -1336,38 +1412,31 @@
 	dprintk("RPC:      setting up %s transport...\n",
 				proto == IPPROTO_UDP? "UDP" : "TCP");
 
-#if LINUX_VERSION_CODE >= 0x020100
 	inet = sock->sk;
-#else
-	inet = (struct sock *) sock->data;
-#endif
 
 	if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
 		return NULL;
 	memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
 
-	xprt->file = NULL;
 	xprt->sock = sock;
 	xprt->inet = inet;
 	xprt->addr = *ap;
 	xprt->prot = proto;
 	xprt->stream = (proto == IPPROTO_TCP)? 1 : 0;
-	xprt->cwnd = RPC_INITCWND;
-#ifdef SOCK_HAS_USER_DATA
+	xprt->congtime = jiffies;
 	inet->user_data = xprt;
-#else
-	xprt->link = sock_list;
-	sock_list = xprt;
-#endif
 	xprt->old_data_ready = inet->data_ready;
 	xprt->old_state_change = inet->state_change;
 	xprt->old_write_space = inet->write_space;
 	if (proto == IPPROTO_UDP) {
 		inet->data_ready = udp_data_ready;
+		inet->write_space = udp_write_space;
+		xprt->cwnd = RPC_INITCWND;
 	} else {
 		inet->data_ready = tcp_data_ready;
 		inet->state_change = tcp_state_change;
 		inet->write_space = tcp_write_space;
+		xprt->cwnd = RPC_MAXCWND;
 		xprt->nocong = 1;
 	}
 	xprt->connected = 1;
@@ -1403,39 +1472,6 @@
 }
 
 /*
- * Create and initialize an RPC client given an open file.
- * This is obsolete now.
- */
-#if 0
-struct rpc_xprt *
-xprt_create(struct file *file, struct sockaddr_in *ap, struct rpc_timeout *to)
-{
-	struct rpc_xprt	*xprt;
-	struct socket	*sock;
-	int		proto;
-
-	if (!file) {
-		printk("RPC: file == NULL in xprt_create!\n");
-		return NULL;
-	}
-
-	sock = &file->f_inode->u.socket_i;
-	if (sock->ops->family != PF_INET) {
-		printk(KERN_WARNING "RPC: only INET sockets supported\n");
-		return NULL;
-	}
-
-	proto = (sock->type == SOCK_DGRAM)? IPPROTO_UDP : IPPROTO_TCP;
-	if ((xprt = xprt_setup(sock, proto, ap, to)) != NULL) {
-		xprt->file = file;
-		file->f_count++;
-	}
-
-	return xprt;
-}
-#endif
-
-/*
  * Bind to a reserved port
  */
 static inline int
@@ -1473,6 +1509,7 @@
 			   (proto == IPPROTO_UDP)? "udp" : "tcp", proto);
 
 	type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+
 	if ((err = sock_create(PF_INET, type, proto, &sock)) < 0) {
 		printk("RPC: can't create socket (%d).\n", -err);
 		goto failed;
@@ -1529,6 +1566,21 @@
 	rpc_wake_up(&xprt->pending);
 	rpc_wake_up(&xprt->backlog);
 	rpc_wake_up(&xprt->reconn);
+	wake_up(&xprt->cong_wait);
+}
+
+/*
+ * Clear the xprt backlog queue
+ */
+int
+xprt_clear_backlog(struct rpc_xprt *xprt) {
+	if (!xprt)
+		return 0;
+	if (RPCXPRT_CONGESTED(xprt))
+		return 0;
+	rpc_wake_up_next(&xprt->backlog);
+	wake_up(&xprt->cong_wait);
+	return 1;
 }
 
 /*
@@ -1537,18 +1589,6 @@
 int
 xprt_destroy(struct rpc_xprt *xprt)
 {
-#ifndef SOCK_HAS_USER_DATA
-	struct rpc_xprt	**q;
-
-	for (q = &sock_list; *q && *q != xprt; q = &((*q)->link))
-		;
-	if (!*q) {
-		printk(KERN_WARNING "xprt_destroy: unknown socket!\n");
-		return -EIO;	/* why is there no EBUGGYSOFTWARE */
-	}
-	*q = xprt->link;
-#endif
-
 	dprintk("RPC:      destroying transport %p\n", xprt);
 	xprt_close(xprt);
 	kfree(xprt);
