diff -rupBb linux-2.4.24.orig/fs/buffer.c linux-2.4.24.new/fs/buffer.c
--- linux-2.4.24.orig/fs/buffer.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/buffer.c	Tue Mar 16 13:17:18 2004
@@ -623,6 +623,20 @@ void buffer_insert_list(struct buffer_he
 	spin_unlock(&lru_list_lock);
 }
 
+void buffer_insert_list_journal_head(struct buffer_head *bh, 
+                                     struct list_head *list,
+				     void *journal_head)
+{
+	spin_lock(&lru_list_lock);
+	if (buffer_attached(bh))
+		list_del(&bh->b_inode_buffers);
+	set_buffer_attached(bh);
+	list_add(&bh->b_inode_buffers, list);
+	bh->b_journal_head = journal_head;
+	spin_unlock(&lru_list_lock);
+}
+EXPORT_SYMBOL(buffer_insert_list_journal_head);
+
 /*
  * The caller must have the lru_list lock before calling the 
  * remove_inode_queue functions.
@@ -1062,6 +1076,7 @@ inline void __mark_dirty(struct buffer_h
 	bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer;
 	refile_buffer(bh);
 }
+EXPORT_SYMBOL(__mark_dirty);
 
 /* atomic version, the user must call balance_dirty() by hand
    as soon as it become possible to block */
@@ -1335,7 +1350,7 @@ no_grow:
 /*
  * Called when truncating a buffer on a page completely.
  */
-static void discard_buffer(struct buffer_head * bh)
+void discard_buffer(struct buffer_head * bh)
 {
 	if (buffer_mapped(bh)) {
 		mark_buffer_clean(bh);
Only in linux-2.4.24.new/fs: inode.c~
diff -rupBb linux-2.4.24.orig/fs/reiserfs/Makefile linux-2.4.24.new/fs/reiserfs/Makefile
--- linux-2.4.24.orig/fs/reiserfs/Makefile	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/Makefile	Tue Mar 16 13:17:18 2004
@@ -7,6 +7,7 @@
 #
 # Note 2! The CFLAGS definitions are now in the main makefile...
 
+export-objs := super.o
 O_TARGET := reiserfs.o
 obj-y   := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o super.o prints.o objectid.o \
 lbalance.o ibalance.o stree.o hashes.o buffer2.o tail_conversion.o journal.o resize.o item_ops.o ioctl.o procfs.o
diff -rupBb linux-2.4.24.orig/fs/reiserfs/do_balan.c linux-2.4.24.new/fs/reiserfs/do_balan.c
--- linux-2.4.24.orig/fs/reiserfs/do_balan.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/do_balan.c	Tue Mar 16 13:43:45 2004
@@ -33,16 +33,8 @@ struct tree_balance * cur_tb = NULL; /* 
 inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, 
 					struct buffer_head * bh, int flag)
 {
-    if (reiserfs_dont_log(tb->tb_sb)) {
-	if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
-	    __mark_buffer_dirty(bh) ;
-	    tb->need_balance_dirty = 1;
-	}
-    } else {
-	int windex = push_journal_writer("do_balance") ;
-	journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ;
-	pop_journal_writer(windex) ;
-    }
+    journal_mark_dirty(tb->transaction_handle, 
+                       tb->transaction_handle->t_super, bh) ;
 }
 
 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
@@ -1259,7 +1251,9 @@ void reiserfs_invalidate_buffer (struct 
     set_blkh_level( blkh, FREE_LEVEL );
     set_blkh_nr_item( blkh, 0 );
     
-    mark_buffer_clean (bh);
+    if (buffer_dirty(bh))
+        BUG();
+    // mark_buffer_clean (bh);
     /* reiserfs_free_block is no longer schedule safe 
     reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr);
     */
@@ -1575,6 +1568,7 @@ void do_balance (struct tree_balance * t
     tb->tb_mode = flag;
     tb->need_balance_dirty = 0;
 
+    reiserfs_check_lock_depth("do balance");
     if (FILESYSTEM_CHANGED_TB(tb)) {
         reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
     }
@@ -1605,5 +1599,6 @@ void do_balance (struct tree_balance * t
 
 
     do_balance_completed (tb);
+    reiserfs_check_lock_depth("do balance2");
 
 }
diff -rupBb linux-2.4.24.orig/fs/reiserfs/file.c linux-2.4.24.new/fs/reiserfs/file.c
--- linux-2.4.24.orig/fs/reiserfs/file.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/file.c	Tue Mar 16 13:43:45 2004
@@ -42,7 +42,6 @@ static int reiserfs_file_release (struct
     lock_kernel() ;
     down (&inode->i_sem); 
     journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3) ;
-    reiserfs_update_inode_transaction(inode) ;
 
 #ifdef REISERFS_PREALLOCATE
     reiserfs_discard_prealloc (&th, inode);
@@ -101,8 +100,17 @@ static int reiserfs_setattr(struct dentr
 	    attr->ia_size > MAX_NON_LFS)
             return -EFBIG ;
 
+        /* During a truncate, we have to make sure the new i_size is in
+	** the transaction before we start dropping updates to data logged
+	** or ordered write data pages.
+	*/
+	if (attr->ia_size < inode->i_size && reiserfs_file_data_log(inode)) {
+	    struct reiserfs_transaction_handle th ;
+	    journal_begin(&th, inode->i_sb, 1) ;
+	    reiserfs_update_sd_size(&th, inode, attr->ia_size) ;
+	    journal_end(&th, inode->i_sb, 1) ;
 	/* fill in hole pointers in the expanding truncate case. */
-        if (attr->ia_size > inode->i_size) {
+        } else if (attr->ia_size > inode->i_size) {
 	    error = generic_cont_expand(inode, attr->ia_size) ;
 	    if (inode->u.reiserfs_i.i_prealloc_count > 0) {
 		struct reiserfs_transaction_handle th ;
@@ -129,9 +137,24 @@ static int reiserfs_setattr(struct dentr
     return error ;
 }
 
+static ssize_t
+reiserfs_file_write(struct file *f, const char *b, size_t count, loff_t *ppos)
+{
+    ssize_t ret;
+    struct inode *inode = f->f_dentry->d_inode;
+
+    ret = generic_file_write(f, b, count, ppos);
+    if (ret >= 0 && f->f_flags & O_SYNC) {
+        lock_kernel();
+	reiserfs_commit_for_inode(inode);
+	unlock_kernel();
+    }
+    return ret;
+}
+
 struct file_operations reiserfs_file_operations = {
     read:	generic_file_read,
-    write:	generic_file_write,
+    write:	reiserfs_file_write,
     ioctl:	reiserfs_ioctl,
     mmap:	generic_file_mmap,
     release:	reiserfs_file_release,
diff -rupBb linux-2.4.24.orig/fs/reiserfs/fix_node.c linux-2.4.24.new/fs/reiserfs/fix_node.c
--- linux-2.4.24.orig/fs/reiserfs/fix_node.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/fix_node.c	Tue Mar 16 13:43:45 2004
@@ -2121,7 +2121,8 @@ static void tb_buffer_sanity_check (stru
 
 static void clear_all_dirty_bits(struct super_block *s, 
                                  struct buffer_head *bh) {
-  reiserfs_prepare_for_journal(s, bh, 0) ;
+  // reiserfs_prepare_for_journal(s, bh, 0) ;
+  set_bit(BH_JPrepared, &bh->b_state) ;
 }
 
 static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
diff -rupBb linux-2.4.24.orig/fs/reiserfs/inode.c linux-2.4.24.new/fs/reiserfs/inode.c
--- linux-2.4.24.orig/fs/reiserfs/inode.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/inode.c	Tue Mar 16 13:43:45 2004
@@ -17,6 +17,8 @@
 #define GET_BLOCK_READ_DIRECT 4  /* read the tail if indirect item not found */
 #define GET_BLOCK_NO_ISEM     8 /* i_sem is not held, don't preallocate */
 
+static int reiserfs_commit_write(struct file *, struct page *, 
+                                 unsigned from, unsigned to) ;
 static int reiserfs_get_block (struct inode * inode, long block,
 			       struct buffer_head * bh_result, int create);
 
@@ -106,9 +108,13 @@ inline void make_le_item_head (struct it
 }
 
 static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
-    struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
+    struct reiserfs_journal_list *jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
+    buffer_insert_list_journal_head(bh, &jl->j_ordered_bh_list, jl);
+}
 
-    buffer_insert_list(bh, &j->j_dirty_buffers) ;
+static void add_to_tail_list(struct inode *inode, struct buffer_head *bh) {
+    struct reiserfs_journal_list *jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
+    buffer_insert_list_journal_head(bh, &jl->j_tail_bh_list, jl);
 }
 
 //
@@ -201,15 +207,16 @@ static int file_capable (struct inode * 
     return 0;
 }
 
-/*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
-				struct inode *inode, struct path *path) {
-  struct super_block *s = th->t_super ;
-  int len = th->t_blocks_allocated ;
-
+static void restart_transaction(struct reiserfs_transaction_handle *th,
+				struct inode *inode, struct path *path,
+				int jbegin_count) {
+  /* we cannot restart while nested unless the parent allows it */
+  if (!reiserfs_restartable_handle(th) && th->t_refcount > 1) {
+      return  ;
+  }
   pathrelse(path) ;
   reiserfs_update_sd(th, inode) ;
-  journal_end(th, s, len) ;
-  journal_begin(th, s, len) ;
+  reiserfs_restart_transaction(th, jbegin_count) ;
   reiserfs_update_inode_transaction(inode) ;
 }
 
@@ -327,6 +334,10 @@ research:
 	}
     }
     p += offset ;
+    if ((offset + inode->i_sb->s_blocksize) > PAGE_CACHE_SIZE) {
+printk("get_block_create_0 offset %lu too large\n", offset);
+    }
+        
     memset (p, 0, inode->i_sb->s_blocksize);
     do {
 	if (!is_direct_le_ih (ih)) {
@@ -421,10 +432,32 @@ static int reiserfs_get_block_create_0 (
 static int reiserfs_get_block_direct_io (struct inode * inode, long block,
 			struct buffer_head * bh_result, int create) {
     int ret ;
-
+    struct reiserfs_transaction_handle *th;
+    int refcount = 0;
+    struct super_block *s = inode->i_sb;
+
+    /* get_block might start a new transaction and leave it running.
+     * test for that by checking for a transaction running right now
+     * and recording its refcount.  Run a journal_end if the refcount
+     * after reiserfs_get_block is higher than it was before.
+     */
+    if (reiserfs_transaction_running(s)) {
+	th = current->journal_info;
+	refcount = th->t_refcount;
+    }
     bh_result->b_page = NULL;
     ret = reiserfs_get_block(inode, block, bh_result, create) ;
 
+    if (!ret && reiserfs_transaction_running(s)) {
+	th = current->journal_info;
+	if (th->t_refcount > refcount) {
+	    lock_kernel();
+	    reiserfs_update_sd(th, inode) ;
+	    journal_end(th, s, th->t_blocks_allocated);
+	    unlock_kernel();
+	}
+    }
+
     /* don't allow direct io onto tail pages */
     if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
 	/* make sure future calls to the direct io funcs for this offset
@@ -459,7 +492,6 @@ static int convert_tail_for_hole(struct 
                                  struct buffer_head *bh_result,
 				 loff_t tail_offset) {
     unsigned long index ;
-    unsigned long tail_end ; 
     unsigned long tail_start ;
     struct page * tail_page ;
     struct page * hole_page = bh_result->b_page ;
@@ -470,7 +502,6 @@ static int convert_tail_for_hole(struct 
 
     /* always try to read until the end of the block */
     tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
-    tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
 
     index = tail_offset >> PAGE_CACHE_SHIFT ;
     if ( !hole_page || index != hole_page->index) {
@@ -492,16 +523,13 @@ static int convert_tail_for_hole(struct 
     ** data that has been read directly into the page, and block_prepare_write
     ** won't trigger a get_block in this case.
     */
-    fix_tail_page_for_writing(tail_page) ;
-    retval = block_prepare_write(tail_page, tail_start, tail_end, 
-                                 reiserfs_get_block) ; 
+    retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_start) ;
     if (retval)
         goto unlock ;
 
     /* tail conversion might change the data in the page */
     flush_dcache_page(tail_page) ;
-
-    retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
+    retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_start) ;
 
 unlock:
     if (tail_page != hole_page) {
@@ -541,20 +569,34 @@ static int reiserfs_get_block (struct in
     int done;
     int fs_gen;
     int windex ;
-    struct reiserfs_transaction_handle th ;
+    struct reiserfs_transaction_handle *th = NULL ;
     /* space reserved in transaction batch: 
         . 3 balancings in direct->indirect conversion
         . 1 block involved into reiserfs_update_sd()
+	. 1 bitmap block
        XXX in practically impossible worst case direct2indirect()
-       can incur (much) more that 3 balancings. */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
+       can incur (much) more that 3 balancings, but we deal with
+       direct2indirect lower down */
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT + 2;
     int version;
-    int transaction_started = 0 ;
+    int dangle = 1;
     loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
+    int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
 
-				/* bad.... */
+    /* if this block might contain a tail, we need to be more conservative */
+    if (new_offset <= (loff_t)(16 * 1024)) {
+        jbegin_count += JOURNAL_PER_BALANCE_CNT * 2;
+    }
+    /* we might nest for the entire page, so we need to make sure
+     * to reserve enough to insert pointers in the tree for each block
+     * in the file
+     */
+    jbegin_count *= blocks_per_page;
+    if (reiserfs_file_data_log(inode)) {
+        jbegin_count += blocks_per_page;
+
+    }
     lock_kernel() ;
-    th.t_trans_id = 0 ;
     version = get_inode_item_key_version (inode);
 
     if (block < 0) {
@@ -579,6 +621,10 @@ static int reiserfs_get_block (struct in
 	return ret;
     }
 
+    /* don't leave the trans running if we are already nested */
+    if (reiserfs_transaction_running(inode->i_sb))
+	dangle = 0;
+
     /* If file is of such a size, that it might have a tail and tails are enabled
     ** we should mark it as possibly needing tail packing on close
     */
@@ -591,10 +637,18 @@ static int reiserfs_get_block (struct in
     /* set the key of the first byte in the 'block'-th block of file */
     make_cpu_key (&key, inode, new_offset,
 		  TYPE_ANY, 3/*key length*/);
+
+    /* reiserfs_commit_write will close any transaction currently
+    ** running.  So, if we are nesting into someone else, we have to
+    ** make sure and bump the refcount
+    */
     if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
-	journal_begin(&th, inode->i_sb, jbegin_count) ;
+	th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ;
+	if (IS_ERR(th)) {
+	    retval = PTR_ERR(th) ;
+	    goto failure ;
+	}
 	reiserfs_update_inode_transaction(inode) ;
-	transaction_started = 1 ;
     }
  research:
 
@@ -614,15 +668,18 @@ static int reiserfs_get_block (struct in
 
     if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
 	/* we have to allocate block for the unformatted node */
-	if (!transaction_started) {
+	if (!reiserfs_active_handle(th)) {
 	    pathrelse(&path) ;
-	    journal_begin(&th, inode->i_sb, jbegin_count) ;
+	    th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ;
+	    if (IS_ERR(th)) {
+		retval = PTR_ERR(th) ;
+		goto failure ;
+	    }
 	    reiserfs_update_inode_transaction(inode) ;
-	    transaction_started = 1 ;
 	    goto research ;
 	}
 
-	repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create);
+	repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create);
 
 	if (repeat == NO_DISK_SPACE) {
 	    /* restart the transaction to give the journal a chance to free
@@ -633,8 +690,8 @@ static int reiserfs_get_block (struct in
 	    ** and wait for it
 	    */
 	    SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
-	    restart_transaction(&th, inode, &path) ; 
-	    repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create);
+	    restart_transaction(th, inode, &path, jbegin_count) ; 
+	    repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create);
 
 	    if (repeat != NO_DISK_SPACE) {
 		goto research ;
@@ -664,15 +721,13 @@ static int reiserfs_get_block (struct in
 	    bh_result->b_state |= (1UL << BH_New);
 	    put_block_num(item, pos_in_item, allocated_block_nr) ;
             unfm_ptr = allocated_block_nr;
-	    journal_mark_dirty (&th, inode->i_sb, bh);
+	    journal_mark_dirty (th, inode->i_sb, bh);
 	    inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
-	    reiserfs_update_sd(&th, inode) ;
+	    reiserfs_update_sd(th, inode) ;
 	}
 	set_block_dev_mapped(bh_result, unfm_ptr, inode);
 	pathrelse (&path);
 	pop_journal_writer(windex) ;
-	if (transaction_started)
-	    journal_end(&th, inode->i_sb, jbegin_count) ;
 
 	unlock_kernel() ;
 	 
@@ -680,18 +735,23 @@ static int reiserfs_get_block (struct in
 	** there is no need to make sure the inode is updated with this 
 	** transaction
 	*/
+	if (!dangle && reiserfs_active_handle(th))
+	    journal_end(th, inode->i_sb, jbegin_count) ;
 	return 0;
     }
 
-    if (!transaction_started) {
+    if (!reiserfs_active_handle(th)) {
 	/* if we don't pathrelse, we could vs-3050 on the buffer if
 	** someone is waiting for it (they can't finish until the buffer
-	** is released, we can start a new transaction until they finish)
+	** is released, we can't start a new transaction until they finish)
 	*/
 	pathrelse(&path) ;
-	journal_begin(&th, inode->i_sb, jbegin_count) ;
+	th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ;
+	if (IS_ERR(th)) {
+	    retval = PTR_ERR(th) ;
+	    goto failure ;
+	}
 	reiserfs_update_inode_transaction(inode) ;
-	transaction_started = 1 ;
 	goto research;
     }
 
@@ -720,9 +780,9 @@ static int reiserfs_get_block (struct in
 	    set_cpu_key_k_offset (&tmp_key, 1);
 	    PATH_LAST_POSITION(&path) ++;
 
-	    retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
+	    retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, (char *)&unp);
 	    if (retval) {
-		reiserfs_free_block (&th, allocated_block_nr);
+		reiserfs_free_block (th, allocated_block_nr);
 		goto failure; // retval == -ENOSPC or -EIO or -EEXIST
 	    }
 	    if (unp)
@@ -746,8 +806,13 @@ static int reiserfs_get_block (struct in
 		   node. FIXME: this should also get into page cache */
 
 		pathrelse(&path) ;
-		journal_end(&th, inode->i_sb, jbegin_count) ;
-		transaction_started = 0 ;
+		/* ugly, but we should only end the transaction if
+		** we aren't nested
+		*/
+		if (th->t_refcount == 1) {
+		    journal_end(th, inode->i_sb, jbegin_count) ;
+		    th = NULL ;
+		}
 
 		retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
 		if (retval) {
@@ -755,20 +820,27 @@ static int reiserfs_get_block (struct in
 			reiserfs_warning(inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
 		    if (allocated_block_nr) {
 			/* the bitmap, the super, and the stat data == 3 */
-			journal_begin(&th, inode->i_sb, 3) ;
-			reiserfs_free_block (&th, allocated_block_nr);
-			transaction_started = 1 ;
+			if (!reiserfs_active_handle(th)) {
+			    th = reiserfs_persistent_transaction(inode->i_sb,3);
+			}
+			if (!IS_ERR(th)) {
+			    reiserfs_free_block (th, allocated_block_nr);
+			}
+
 		    }
 		    goto failure ;
 		}
 		goto research ;
 	    }
-	    retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
+	    retval = direct2indirect (th, inode, &path, unbh, tail_offset);
 	    if (retval) {
 		reiserfs_unmap_buffer(unbh);
-		reiserfs_free_block (&th, allocated_block_nr);
+		reiserfs_free_block (th, allocated_block_nr);
 		goto failure;
 	    }
+
+	    reiserfs_update_sd(th, inode) ;
+
 	    /* it is important the mark_buffer_uptodate is done after
 	    ** the direct2indirect.  The buffer might contain valid
 	    ** data newer than the data on disk (read by readpage, changed,
@@ -779,24 +851,25 @@ static int reiserfs_get_block (struct in
 	    */
 	    mark_buffer_uptodate (unbh, 1);
 
-	    /* unbh->b_page == NULL in case of DIRECT_IO request, this means
-	       buffer will disappear shortly, so it should not be added to
-	       any of our lists.
-	    */
-	    if ( unbh->b_page ) {
 		/* we've converted the tail, so we must 
-		** flush unbh before the transaction commits
+	    ** flush unbh before the transaction commits.
+	    ** unbh->b_page will be NULL for direct io requests, and
+	    ** in that case there's no data to log, dirty or order
 		*/
-		add_to_flushlist(inode, unbh) ;
-
+	    if ( unbh->b_page ) {
+		if (reiserfs_file_data_log(inode)) {
+		    reiserfs_prepare_for_journal(inode->i_sb, unbh, 1) ;
+		    journal_mark_dirty(th, inode->i_sb, unbh) ;
+		} else {
 		/* mark it dirty now to prevent commit_write from adding
 		 ** this buffer to the inode's dirty buffer list
 		 */
 		__mark_buffer_dirty(unbh) ;
+		    /* note, this covers the data=ordered case too */
+		    add_to_tail_list(inode, unbh) ;
+		}
 	    }
 
-	    //inode->i_blocks += inode->i_sb->s_blocksize / 512;
-	    //mark_tail_converted (inode);
 	} else {
 	    /* append indirect item with holes if needed, when appending
 	       pointer to 'block'-th block use block, which is already
@@ -844,13 +917,13 @@ static int reiserfs_get_block (struct in
 		   only have space for one block */
 		blocks_needed=max_to_insert?max_to_insert:1;
 	    }
-	    retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
+	    retval = reiserfs_paste_into_item (th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
 
 	    if (blocks_needed != 1)
 		 kfree(un);
 
 	    if (retval) {
-		reiserfs_free_block (&th, allocated_block_nr);
+		reiserfs_free_block (th, allocated_block_nr);
 		goto failure;
 	    }
 	    if (done) {
@@ -874,9 +947,12 @@ static int reiserfs_get_block (struct in
 	**
 	** release the path so that anybody waiting on the path before
 	** ending their transaction will be able to continue.
+	**
+	** this only happens when inserting holes into the file, so it
+	** does not affect data=ordered safety at all
 	*/
-	if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
-	  restart_transaction(&th, inode, &path) ; 
+	if (journal_transaction_should_end(th, jbegin_count)) {
+	    restart_transaction(th, inode, &path, jbegin_count) ; 
 	}
 	/* inserting indirect pointers for a hole can take a 
 	** long time.  reschedule if needed
@@ -894,7 +970,7 @@ static int reiserfs_get_block (struct in
 			      "%K should not be found\n", &key);
 	    retval = -EEXIST;
 	    if (allocated_block_nr)
-	        reiserfs_free_block (&th, allocated_block_nr);
+	        reiserfs_free_block (th, allocated_block_nr);
 	    pathrelse(&path) ;
 	    goto failure;
 	}
@@ -906,16 +982,26 @@ static int reiserfs_get_block (struct in
 
 
     retval = 0;
-    reiserfs_check_path(&path) ;
 
  failure:
-    if (transaction_started) {
-      reiserfs_update_sd(&th, inode) ;
-      journal_end(&th, inode->i_sb, jbegin_count) ;
+    pathrelse(&path) ;
+    /* if we had an error, end the transaction */
+    if (!IS_ERR(th) && reiserfs_active_handle(th)) {
+        if (retval != 0) {
+	    reiserfs_update_sd(th, inode) ;
+	    journal_end(th, inode->i_sb, jbegin_count) ;
+	    th = NULL ;
+	} else if (!dangle) {
+	    journal_end(th, inode->i_sb, jbegin_count) ;
+	    th = NULL ;
+	}
     }
     pop_journal_writer(windex) ;
+    if (retval == 0 && reiserfs_active_handle(th) && 
+        current->journal_info != th) {
+        BUG() ;
+    }
     unlock_kernel() ;
-    reiserfs_check_path(&path) ;
     return retval;
 }
 
@@ -1030,7 +1116,7 @@ static void init_inode (struct inode * i
 
 
 // update new stat data with inode fields
-static void inode2sd (void * sd, struct inode * inode)
+static void inode2sd (void * sd, struct inode * inode, loff_t new_size)
 {
     struct stat_data * sd_v2 = (struct stat_data *)sd;
     __u16 flags;
@@ -1038,7 +1124,7 @@ static void inode2sd (void * sd, struct 
     set_sd_v2_mode(sd_v2, inode->i_mode );
     set_sd_v2_nlink(sd_v2, inode->i_nlink );
     set_sd_v2_uid(sd_v2, inode->i_uid );
-    set_sd_v2_size(sd_v2, inode->i_size );
+    set_sd_v2_size(sd_v2, new_size);
     set_sd_v2_gid(sd_v2, inode->i_gid );
     set_sd_v2_mtime(sd_v2, inode->i_mtime );
     set_sd_v2_atime(sd_v2, inode->i_atime );
@@ -1055,7 +1141,7 @@ static void inode2sd (void * sd, struct 
 
 
 // used to copy inode's fields to old stat data
-static void inode2sd_v1 (void * sd, struct inode * inode)
+static void inode2sd_v1 (void * sd, struct inode * inode, loff_t new_size)
 {
     struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
 
@@ -1063,7 +1149,7 @@ static void inode2sd_v1 (void * sd, stru
     set_sd_v1_uid(sd_v1, inode->i_uid );
     set_sd_v1_gid(sd_v1, inode->i_gid );
     set_sd_v1_nlink(sd_v1, inode->i_nlink );
-    set_sd_v1_size(sd_v1, inode->i_size );
+    set_sd_v1_size(sd_v1, new_size);
     set_sd_v1_atime(sd_v1, inode->i_atime );
     set_sd_v1_ctime(sd_v1, inode->i_ctime );
     set_sd_v1_mtime(sd_v1, inode->i_mtime );
@@ -1081,7 +1167,8 @@ static void inode2sd_v1 (void * sd, stru
 /* NOTE, you must prepare the buffer head before sending it here,
 ** and then log it after the call
 */
-static void update_stat_data (struct path * path, struct inode * inode)
+static void update_stat_data (struct path * path, struct inode * inode,
+                              loff_t new_size)
 {
     struct buffer_head * bh;
     struct item_head * ih;
@@ -1095,17 +1182,16 @@ static void update_stat_data (struct pat
   
     if (stat_data_v1 (ih)) {
 	// path points to old stat data
-	inode2sd_v1 (B_I_PITEM (bh, ih), inode);
+	inode2sd_v1 (B_I_PITEM (bh, ih), inode, new_size);
     } else {
-	inode2sd (B_I_PITEM (bh, ih), inode);
+	inode2sd (B_I_PITEM (bh, ih), inode, new_size);
     }
 
     return;
 }
 
-
-void reiserfs_update_sd (struct reiserfs_transaction_handle *th, 
-			 struct inode * inode)
+void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, 
+                             struct inode *inode, loff_t new_size)
 {
     struct cpu_key key;
     INITIALIZE_PATH(path);
@@ -1155,7 +1241,7 @@ void reiserfs_update_sd (struct reiserfs
 	}
 	break;
     }
-    update_stat_data (&path, inode);
+    update_stat_data (&path, inode, new_size);
     journal_mark_dirty(th, th->t_super, bh) ; 
     pathrelse (&path);
     return;
@@ -1240,6 +1326,7 @@ void reiserfs_read_inode2 (struct inode 
 	    reiserfs_make_bad_inode( inode );
     }
 
+    reiserfs_update_inode_transaction(inode);
     reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
 
 }
@@ -1419,8 +1506,6 @@ int reiserfs_dentry_to_fh(struct dentry 
 ** does something when called for a synchronous update.
 */
 void reiserfs_write_inode (struct inode * inode, int do_sync) {
-    struct reiserfs_transaction_handle th ;
-    int jbegin_count = 1 ;
 
     if (inode->i_sb->s_flags & MS_RDONLY) {
         reiserfs_warning(inode->i_sb, "clm-6005: writing inode %lu on readonly FS\n", 
@@ -1434,9 +1519,7 @@ void reiserfs_write_inode (struct inode 
     */
     if (do_sync && !(current->flags & PF_MEMALLOC)) {
 	lock_kernel() ;
-	journal_begin(&th, inode->i_sb, jbegin_count) ;
-	reiserfs_update_sd (&th, inode);
-	journal_end_sync(&th, inode->i_sb, jbegin_count) ;
+ 	reiserfs_commit_for_inode(inode) ;
 	unlock_kernel() ;
     }
 }
@@ -1642,9 +1725,9 @@ int reiserfs_new_inode (struct reiserfs_
 	    err = -EINVAL;
 	    goto out_bad_inode;
 	}
-	inode2sd_v1 (&sd, inode);
+	inode2sd_v1 (&sd, inode, inode->i_size);
     } else
-	inode2sd (&sd, inode);
+	inode2sd (&sd, inode, inode->i_size);
 
     // these do not go to on-disk stat data
     inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
@@ -1820,6 +1903,7 @@ void reiserfs_truncate_file(struct inode
     unsigned length ;
     struct page *page = NULL ;
     int error ;
+    int need_balance_dirty = 0 ;
     struct buffer_head *bh = NULL ;
 
     if (p_s_inode->i_size > 0) {
@@ -1852,34 +1936,58 @@ void reiserfs_truncate_file(struct inode
 	       transaction of truncating gets committed - on reboot the file
 	       either appears truncated properly or not truncated at all */
 	add_save_link (&th, p_s_inode, 1);
+    if (page)
+	kmap(page);
     reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
     pop_journal_writer(windex) ;
-    journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
-
-    if (update_timestamps)
-	remove_save_link (p_s_inode, 1/* truncate */);
 
     if (page) {
+	if (!PageLocked(page))
+	    BUG();
         length = offset & (blocksize - 1) ;
 	/* if we are not on a block boundary */
 	if (length) {
 	    length = blocksize - length ;
-	    memset((char *)kmap(page) + offset, 0, length) ;   
+	    if ((offset + length) > PAGE_CACHE_SIZE) {
+		BUG();
+	    }
+	    memset((char *)page_address(page) + offset, 0, length) ;   
 	    flush_dcache_page(page) ;
-	    kunmap(page) ;
 	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
+		if (reiserfs_file_data_log(p_s_inode)) {
+		    reiserfs_prepare_for_journal(p_s_inode->i_sb, bh, 1) ;
+		    journal_mark_dirty(&th, p_s_inode->i_sb, bh) ;
+		} else {
+		    /* it is safe to block here, but it would be faster
+		    ** to balance dirty after the journal lock is dropped
+		    */
 	        if (!atomic_set_buffer_dirty(bh)) {
 			set_buffer_flushtime(bh);
 			refile_buffer(bh);
 			buffer_insert_inode_data_queue(bh, p_s_inode);
-			balance_dirty();
+			need_balance_dirty = 1;
+
+			if (reiserfs_data_ordered(p_s_inode->i_sb)) {
+			    add_to_flushlist(p_s_inode, bh) ;
+			}
 		}
 	    }
 	}
+	}
+	kunmap(page);
+    }
+    journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1) ;
+
+    if (update_timestamps)
+	remove_save_link(p_s_inode, 1/* truncate */);
+
+    if (page) {
 	UnlockPage(page) ;
 	page_cache_release(page) ;
     }
-
+    if (need_balance_dirty) {
+	balance_dirty() ;
+    }
     return ;
 }
 
@@ -1948,6 +2056,8 @@ research:
 	    goto research;
 	}
 
+	if (((B_I_PITEM(bh, ih) - bh->b_data) + pos_in_item + copy_size) > inode->i_sb->s_blocksize)
+	    BUG();
 	memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
 
 	journal_mark_dirty(&th, inode->i_sb, bh) ;
@@ -1975,9 +2085,37 @@ out:
 
     /* this is where we fill in holes in the file. */
     if (use_get_block) {
+	int old_refcount = 0 ;
+	struct reiserfs_transaction_handle *hole_th ;
+	if (reiserfs_transaction_running(inode->i_sb)) {
+	    hole_th = current->journal_info ;
+	    old_refcount = hole_th->t_refcount ;
+	}
 	retval = reiserfs_get_block(inode, block, bh_result, 
 	                            GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ;
 	if (!retval) {
+	    /* did reiserfs_get_block leave us a running transaction? */
+	    if (reiserfs_transaction_running(inode->i_sb)) {
+		hole_th = current->journal_info ;
+		if (old_refcount < hole_th->t_refcount) {
+		    lock_kernel() ;
+		    /* we've filled a hole, make sure the new block
+		     * gets to disk before transaction commit
+		     */
+		    if (buffer_mapped(bh_result) && bh_result->b_blocknr != 0 &&
+		        reiserfs_data_ordered(inode->i_sb))
+		    {
+			__mark_buffer_dirty(bh_result) ;
+			mark_buffer_uptodate(bh_result, 1);
+			/* no need to update the inode trans, already done */
+			add_to_flushlist(inode, bh_result) ;
+		    }
+		    reiserfs_update_sd(hole_th, inode) ;
+		    journal_end(hole_th, hole_th->t_super, 
+		                hole_th->t_blocks_allocated) ;
+		    unlock_kernel() ;
+		}
+	    }
 	    if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
 	        /* get_block failed to find a mapped unformatted node. */
 		use_get_block = 0 ;
@@ -1992,33 +2130,41 @@ out:
 /* helper func to get a buffer head ready for writepage to send to
 ** ll_rw_block
 */
-static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
+static void submit_bh_for_writepage(struct page *page, 
+                                    struct buffer_head **bhp, int nr) {
     struct buffer_head *bh ;
     int i;
 
-    /* lock them all first so the end_io handler doesn't unlock the page
-    ** too early
+    /* lock them all first so the end_io handler doesn't
+    ** unlock too early
+    **
+    ** There's just no safe way to log the buffers during writepage,
+    ** we'll deadlock if kswapd tries to start a transaction.
+    **
+    ** There's also no useful way to tie them to a specific transaction,
+    ** so we just don't bother.
     */
     for(i = 0 ; i < nr ; i++) {
         bh = bhp[i] ;
-	lock_buffer(bh) ;
-	set_buffer_async_io(bh) ;
+	lock_buffer(bh);
+	set_buffer_async_io(bh);
+	set_bit(BH_Uptodate, &bh->b_state) ;
     }
     for(i = 0 ; i < nr ; i++) {
+	bh = bhp[i] ;
 	/* submit_bh doesn't care if the buffer is dirty, but nobody
 	** later on in the call chain will be cleaning it.  So, we
 	** clean the buffer here, it still gets written either way.
 	*/
-        bh = bhp[i] ;
 	clear_bit(BH_Dirty, &bh->b_state) ;
-	set_bit(BH_Uptodate, &bh->b_state) ;
 	submit_bh(WRITE, bh) ;
     }
 }
 
 static int reiserfs_write_full_page(struct page *page) {
     struct inode *inode = page->mapping->host ;
-    unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
+    loff_t size = inode->i_size;
+    unsigned long end_index = size >> PAGE_CACHE_SHIFT ;
     unsigned last_offset = PAGE_CACHE_SIZE;
     int error = 0;
     unsigned long block ;
@@ -2028,21 +2174,36 @@ static int reiserfs_write_full_page(stru
     struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
     int nr = 0 ;
 
+    if (reiserfs_transaction_running(inode->i_sb)) {
+        BUG();
+    }
+
+    if (!PageLocked(page))
+        BUG();
+
     if (!page->buffers) {
         block_prepare_write(page, 0, 0, NULL) ;
 	kunmap(page) ;
     }
+
+    if (reiserfs_transaction_running(inode->i_sb)) {
+        BUG();
+    }
     /* last page in the file, zero out any contents past the
     ** last byte in the file
     */
     if (page->index >= end_index) {
-        last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
+        char *p ;
+        last_offset = size & (PAGE_CACHE_SIZE - 1) ;
 	/* no file contents in this page */
 	if (page->index >= end_index + 1 || !last_offset) {
 	    error =  -EIO ;
 	    goto fail ;
 	}
-	memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
+	p = kmap(page);
+	if (last_offset > PAGE_CACHE_SIZE)
+	    BUG();
+	memset(p + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
 	flush_dcache_page(page) ;
 	kunmap(page) ;
     }
@@ -2083,7 +2244,7 @@ static int reiserfs_write_full_page(stru
     ** nr == 0 without there being any kind of error.
     */
     if (nr) {
-        submit_bh_for_writepage(arr, nr) ;
+        submit_bh_for_writepage(page, arr, nr) ;
 	wakeup_page_waiters(page);
     } else {
         UnlockPage(page) ;
@@ -2095,7 +2256,7 @@ static int reiserfs_write_full_page(stru
 
 fail:
     if (nr) {
-        submit_bh_for_writepage(arr, nr) ;
+        submit_bh_for_writepage(page, arr, nr) ;
     } else {
         UnlockPage(page) ;
     }
@@ -2120,10 +2281,46 @@ static int reiserfs_writepage (struct pa
 
 int reiserfs_prepare_write(struct file *f, struct page *page, 
 			   unsigned from, unsigned to) {
+    int cur_refcount = 0 ;
+    int ret ;
     struct inode *inode = page->mapping->host ;
+    struct reiserfs_transaction_handle *th ;
+
     reiserfs_wait_on_write_block(inode->i_sb) ;
     fix_tail_page_for_writing(page) ;
-    return block_prepare_write(page, from, to, reiserfs_get_block) ;
+
+    /* we look for a running transaction before the block_prepare_write
+    ** call, and then again afterwards.  This lets us know if
+    ** reiserfs_get_block added any additional transactions, so we can
+    ** let reiserfs_commit_write know if he needs to close them.
+    ** this is just nasty
+    */
+    if (reiserfs_transaction_running(inode->i_sb)) {
+	th = current->journal_info ;
+	cur_refcount = th->t_refcount ;
+    }
+    ret =  block_prepare_write(page, from, to, reiserfs_get_block) ;
+
+    /* it is very important that we only set the dangling bit when
+    ** there is no chance of additional nested transactions. 
+    */
+    if (reiserfs_transaction_running(inode->i_sb)) {
+        th = current->journal_info ;
+	if (th->t_refcount > cur_refcount) {
+	    /* if we return an error, commit_write isn't going to get called
+	     * we need to make sure we end any transactions 
+	     * reiserfs_get_block left hanging around
+	     */
+	    if (ret) {
+		lock_kernel();
+		journal_end(th, th->t_super, th->t_blocks_allocated) ;
+		unlock_kernel();
+	    } else {
+		reiserfs_set_handle_dangling(th) ;
+	    }
+	}
+    }
+    return ret ;
 }
 
 
@@ -2131,20 +2328,96 @@ static int reiserfs_aop_bmap(struct addr
   return generic_block_bmap(as, block, reiserfs_bmap) ;
 }
 
+/* taken from fs/buffer.c */
+static int __commit_write(struct reiserfs_transaction_handle *th,
+                          struct inode *inode, struct page *page,
+			  unsigned from, unsigned to, int *balance)
+{
+    unsigned block_start, block_end;
+    int partial = 0;
+    unsigned blocksize;
+    struct buffer_head *bh, *head;
+    int logbh = 0 ;
+
+    blocksize = 1 << inode->i_blkbits;
+    if (reiserfs_file_data_log(inode)) {
+        logbh = 1 ;
+	lock_kernel() ;
+	/* one for each block + the stat data, the caller closes the handle */
+	journal_begin(th, inode->i_sb,(PAGE_CACHE_SIZE >> inode->i_blkbits)+1);
+	reiserfs_update_inode_transaction(inode) ;
+	unlock_kernel() ;
+    }
+
+    for(bh = head = page->buffers, block_start = 0;
+        bh != head || !block_start;
+        block_start=block_end, bh = bh->b_this_page) {
+	block_end = block_start + blocksize;
+	if (block_end <= from || block_start >= to) {
+	    if (!buffer_uptodate(bh))
+		    partial = 1;
+	} else {
+	    set_bit(BH_Uptodate, &bh->b_state);
+	    if (logbh) {
+	        lock_kernel() ;
+		reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
+		journal_mark_dirty (th, inode->i_sb, bh);
+		unlock_kernel() ;
+	    } else if (!atomic_set_buffer_dirty(bh)) {
+		__mark_dirty(bh);
+		if (reiserfs_data_ordered(inode->i_sb)) {
+		    lock_kernel();
+		    add_to_flushlist(inode, bh);
+		    /* if we don't update the inode trans information,
+		     * an fsync(fd) might not catch these data blocks
+		     */
+		    reiserfs_update_inode_transaction(inode);
+		    unlock_kernel();
+		} else {
+		    buffer_insert_inode_data_queue(bh, inode);
+		}
+		*balance = 1;
+	    }
+	}
+    }
+
+    /*
+     * is this a partial write that happened to make all buffers
+     * uptodate then we can optimize away a bogus readpage() for
+     * the next read(). Here we 'discover' wether the page went
+     * uptodate as a result of this (potentially partial) write.
+     */
+    if (!partial)
+	SetPageUptodate(page);
+    return 0;
+}
+
 static int reiserfs_commit_write(struct file *f, struct page *page, 
                                  unsigned from, unsigned to) {
     struct inode *inode = page->mapping->host ;
     loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
     int ret ; 
+    int need_balance = 0;
+    struct reiserfs_transaction_handle th ;
+    struct reiserfs_transaction_handle *dth = NULL ;
     
+    /* we must do this before anything that might nest a transaction or
+    ** mess with the handle flags
+    */
+    if (reiserfs_transaction_running(inode->i_sb)) {
+	dth = current->journal_info ;
+	if (reiserfs_dangling_handle(dth)) {
+	    reiserfs_clear_handle_dangling(dth) ;
+	} else {
+	    dth = NULL ;
+	}
+    }
     reiserfs_wait_on_write_block(inode->i_sb) ;
  
-    /* generic_commit_write does this for us, but does not update the
-    ** transaction tracking stuff when the size changes.  So, we have
-    ** to do the i_size updates here.
-    */
+    th.t_flags = 0 ;
+    ret = __commit_write(&th, inode, page, from, to, &need_balance) ;
+ 
     if (pos > inode->i_size) {
-	struct reiserfs_transaction_handle th ;
 	lock_kernel();
 	/* If the file have grown beyond the border where it
 	   can have a tail, unmark it as needing a tail
@@ -2153,24 +2426,135 @@ static int reiserfs_commit_write(struct 
 	     (have_small_tails (inode->i_sb) && inode->i_size > block_size(inode)) )
 	    inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
 
+	if (!reiserfs_active_handle(&th)) {
 	journal_begin(&th, inode->i_sb, 1) ;
+	}
 	reiserfs_update_inode_transaction(inode) ;
 	inode->i_size = pos ;
 	reiserfs_update_sd(&th, inode) ;
-	journal_end(&th, inode->i_sb, 1) ;
+	journal_end(&th, th.t_super, th.t_blocks_allocated) ;
+	unlock_kernel() ;
+    } else if (reiserfs_active_handle(&th)) {
+	/* in case commit_write left one running and the i_size update did
+	** not close it
+	*/
+	lock_kernel() ;
+	journal_end(&th, th.t_super, th.t_blocks_allocated) ;
+	unlock_kernel() ;
+    }
+
+    /* did reiserfs_get_block leave us with a running transaction?
+    */
+    if (dth) {
+	lock_kernel() ;
+	journal_end(dth, dth->t_super, dth->t_blocks_allocated) ;
 	unlock_kernel();
     }
  
-    ret = generic_commit_write(f, page, from, to) ;
+    kunmap(page) ;
 
-    /* we test for O_SYNC here so we can commit the transaction
-    ** for any packed tails the file might have had
-    */
-    if (f && (f->f_flags & O_SYNC)) {
+    if (need_balance)
+	balance_dirty();
+
+    return ret ;
+}
+
+/* decide if this buffer needs to stay around for data logging or ordered
+** write purposes
+*/
+static int flushpage_can_drop(struct inode *inode, struct buffer_head *bh) {
+    int ret = 1 ;
+    
+    if (!buffer_mapped(bh)) {
+        return 1 ;
+    }
+    if (reiserfs_file_data_log(inode)) {
 	lock_kernel() ;
- 	reiserfs_commit_for_inode(inode) ;
+	/* very conservative, leave the buffer pinned if anyone might need it.
+	** this should be changed to drop the buffer if it is only in the
+	** current transaction
+	*/
+        if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
+	    ret = 0 ;
+	}
+	unlock_kernel() ;
+    }
+    if (reiserfs_data_ordered(inode->i_sb)) {
+        if (buffer_dirty(bh) && bh->b_journal_head) {
+	    struct reiserfs_journal_list *jl = NULL;
+	    lock_kernel();
+
+	    /* we can race against fsync_inode_buffers if we aren't careful */
+	    if (buffer_attached(bh) && buffer_dirty(bh))
+		jl = bh->b_journal_head;
+
+	    /* why is this safe?
+	     * reiserfs_setattr updates i_size in the on disk
+	     * stat data before allowing vmtruncate to be called.
+	     *
+	     * If buffer was put onto the ordered list for this
+	     * transaction, we know for sure either this transaction
+	     * or an older one already has updated i_size on disk,
+	     * and this ordered data won't be referenced in the file
+	     * if we crash.
+	     *
+	     * if the buffer was put onto the ordered list for an older
+	     * transaction, we need to leave it around
+	     */
+	    if (jl != SB_JOURNAL(inode->i_sb)->j_current_jl) {
+	        ret = 0;
+	    } 
 	unlock_kernel();
     }
+    }
+    return ret ;
+}
+
+/* stolen from fs/buffer.c:discard_bh_page */
+static int reiserfs_flushpage(struct page *page, unsigned long offset) {
+    struct buffer_head *head, *bh, *next;
+    struct inode *inode = page->mapping->host ;
+    unsigned int curr_off = 0;
+    int ret = 1;
+
+    if (!PageLocked(page))
+	BUG();
+    if (!page->buffers)
+	return 1;
+
+    head = page->buffers;
+    bh = head;
+    do {
+	unsigned int next_off = curr_off + bh->b_size;
+	next = bh->b_this_page;
+
+	/* is this buffer to be completely truncated away? */
+	if (offset <= curr_off) {
+            if (flushpage_can_drop(inode, bh))
+		discard_buffer(bh);
+	    else
+	        ret = 0 ;
+	}
+	curr_off = next_off;
+	bh = next;
+    } while (bh != head);
+
+    /*
+     * subtle. We release buffer-heads only if this is
+     * the 'final' flushpage. We have invalidated the get_block
+     * cached value unconditionally, so real IO is not
+     * possible anymore.
+     *
+     * If the free doesn't work out, the buffers can be
+     * left around - they just turn into anonymous buffers
+     * instead.
+     */
+    if (!offset) {
+	if (!ret || !try_to_free_buffers(page, 0))
+	    return 0;
+        if (page->buffers)
+	    BUG();
+    }
     return ret ;
 }
 
@@ -2226,6 +2610,9 @@ static int reiserfs_direct_io(int rw, st
                               struct kiobuf *iobuf, unsigned long blocknr,
 			      int blocksize) 
 {
+    if (reiserfs_data_ordered(inode->i_sb) || reiserfs_file_data_log(inode)) {
+	return -EINVAL;
+    }
     lock_kernel();
     reiserfs_commit_for_tail(inode);
     unlock_kernel();
@@ -2241,4 +2628,5 @@ struct address_space_operations reiserfs
     commit_write: reiserfs_commit_write,
     bmap: reiserfs_aop_bmap,
     direct_IO: reiserfs_direct_io,
+    flushpage: reiserfs_flushpage,
 } ;
diff -rupBb linux-2.4.24.orig/fs/reiserfs/ioctl.c linux-2.4.24.new/fs/reiserfs/ioctl.c
--- linux-2.4.24.orig/fs/reiserfs/ioctl.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/ioctl.c	Tue Mar 16 13:17:18 2004
@@ -25,9 +25,18 @@ int reiserfs_ioctl (struct inode * inode
 	switch (cmd) {
 	    case REISERFS_IOC_UNPACK:
 		if( S_ISREG( inode -> i_mode ) ) {
-		if (arg)
-		    return reiserfs_unpack (inode, filp);
-			else
+		    if (arg) {
+			int result; 
+			result = reiserfs_unpack (inode, filp);
+			if (reiserfs_file_data_log(inode)) {
+			    struct reiserfs_transaction_handle th;
+			    lock_kernel();
+			    journal_begin(&th, inode->i_sb, 1);
+			    SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
+			    journal_end_sync(&th, inode->i_sb, 1);
+			    unlock_kernel();
+			}
+		    } else
 				return 0;
 		} else
 			return -ENOTTY;
@@ -97,6 +106,7 @@ int reiserfs_unpack (struct inode * inod
     int retval = 0;
     int index ;
     struct page *page ;
+    struct address_space *mapping ;
     unsigned long write_from ;
     unsigned long blocksize = inode->i_sb->s_blocksize ;
     	
@@ -127,19 +137,20 @@ int reiserfs_unpack (struct inode * inod
     ** reiserfs_get_block to unpack the tail for us.
     */
     index = inode->i_size >> PAGE_CACHE_SHIFT ;
-    page = grab_cache_page(inode->i_mapping, index) ;
+    mapping = inode->i_mapping ;
+    page = grab_cache_page(mapping, index) ;
     retval = -ENOMEM;
     if (!page) {
         goto out ;
     }
-    retval = reiserfs_prepare_write(NULL, page, write_from, blocksize) ;
+    retval = mapping->a_ops->prepare_write(NULL, page, write_from, write_from) ;
     if (retval)
         goto out_unlock ;
 
     /* conversion can change page contents, must flush */
     flush_dcache_page(page) ;
     inode->u.reiserfs_i.i_flags |= i_nopack_mask;
-    kunmap(page) ; /* mapped by prepare_write */
+    retval = mapping->a_ops->commit_write(NULL, page, write_from, write_from) ;
 
 out_unlock:
     UnlockPage(page) ;
diff -rupBb linux-2.4.24.orig/fs/reiserfs/journal.c linux-2.4.24.new/fs/reiserfs/journal.c
--- linux-2.4.24.orig/fs/reiserfs/journal.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/journal.c	Tue Mar 16 13:44:45 2004
@@ -33,17 +33,16 @@
 **		     -- Note, if you call this as an immediate flush from 
 **		        from within kupdate, it will ignore the immediate flag
 **
-** The commit thread -- a writer process for async commits.  It allows a 
-**                      a process to request a log flush on a task queue.
-**                      the commit will happen once the commit thread wakes up.
-**                      The benefit here is the writer (with whatever
-**                      related locks it has) doesn't have to wait for the
-**                      log blocks to hit disk if it doesn't want to.
+** The commit thread -- a writer process  for metadata and async commits.
+**			this allows us to do less io with the journal lock
+** 			held.
 */
 
+#include <linux/module.h>
 #include <linux/config.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include <linux/init.h>
 
 #include <linux/sched.h>
 #include <asm/semaphore.h>
@@ -59,6 +58,12 @@
 #include <linux/string.h>
 #include <linux/smp_lock.h>
 
+/* gets a struct reiserfs_journal_list * from a list head */
+#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+                               j_list))
+#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+                               j_working_list))
+
 /* the number of mounted filesystems.  This is used to decide when to
 ** start and kill the commit thread
 */
@@ -66,12 +71,11 @@ static int reiserfs_mounted_fs_count = 0
 
 static struct list_head kreiserfsd_supers = LIST_HEAD_INIT(kreiserfsd_supers);
 
-/* wake this up when you add something to the commit thread task queue */
+/* wake this up when you want help from the commit thread */
 DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_wait) ;
 
-/* wait on this if you need to be sure you task queue entries have been run */
+/* so we can wait for the commit thread to make progress */
 static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ;
-DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ;
 DECLARE_MUTEX(kreiserfsd_sem) ;
 
 #define JOURNAL_TRANS_HALF 1018   /* must be correct to keep the desc and commit
@@ -85,6 +89,9 @@ DECLARE_MUTEX(kreiserfsd_sem) ;
 
 #define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
 
+/* journal list state bits */
+#define LIST_TOUCHED 1
+
 /* flags for do_journal_end */
 #define FLUSH_ALL   1		/* flush commit and real blocks */
 #define COMMIT_NOW  2		/* end and commit this transaction */
@@ -92,6 +99,9 @@ DECLARE_MUTEX(kreiserfsd_sem) ;
 
 /* state bits for the journal */
 #define WRITERS_BLOCKED 1      /* set when new writers not allowed */
+#define WRITERS_QUEUED 2       /* set when log is full due to too many 
+                                *  writers 
+				*/
 
 static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ;
 static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ;
@@ -110,7 +120,7 @@ static void init_journal_hash(struct sup
 ** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
 ** more details.
 */
-static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
+static inline int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
   if (bh) {
     clear_bit(BH_Dirty, &bh->b_state) ;
     refile_buffer(bh) ;
@@ -476,6 +486,8 @@ int push_journal_writer(char *s) {
 int pop_journal_writer(int index) {
 #ifdef CONFIG_REISERFS_CHECK
   if (index >= 0) {
+    if (index >= 512)
+        BUG();
     journal_writers[index] = NULL ;
   }
 #endif
@@ -525,6 +537,12 @@ int reiserfs_in_journal(struct super_blo
     return 0 ;
   }
 
+  /* when data logging is on, no special action is needed for the data
+   * blocks
+   */
+  if (reiserfs_data_log(p_s_sb))
+      search_all = 0;
+
   PROC_INFO_INC( p_s_sb, journal.in_journal );
   /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
   ** if we crash before the transaction that freed it commits,  this transaction won't
@@ -552,6 +570,7 @@ int reiserfs_in_journal(struct super_blo
 
   /* is it in the current transaction.  This should never happen */
   if ((cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, dev,bl,size))) {
+    BUG();
     return 1; 
   }
 
@@ -600,6 +619,83 @@ static void cleanup_freed_for_journal_li
   jl->j_list_bitmap = NULL ;
 }
 
+static int journal_list_still_alive(struct super_block *s, 
+                                    unsigned long trans_id)
+{
+    struct list_head *entry = &SB_JOURNAL(s)->j_journal_list;
+    struct reiserfs_journal_list *jl;
+
+    if (!list_empty(entry)) {
+        jl = JOURNAL_LIST_ENTRY(entry->next);
+	if (jl->j_trans_id <= trans_id) {
+	    return 1;
+	}
+    }
+    return 0;
+}
+
+static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) {
+    struct reiserfs_journal_list *other_jl;
+    struct reiserfs_journal_list *first_jl;
+    struct list_head *entry;
+    unsigned long trans_id = jl->j_trans_id;
+    unsigned long other_trans_id;
+    unsigned long first_trans_id;
+
+find_first:
+    /* 
+     * first we walk backwards to find the oldest uncommitted transation
+     */
+    first_jl = jl;
+    entry = jl->j_list.prev;
+    while(1) {
+	other_jl = JOURNAL_LIST_ENTRY(entry);
+	if (entry == &SB_JOURNAL(s)->j_journal_list || 
+	    atomic_read(&other_jl->j_older_commits_done))
+	    break;
+        
+        first_jl = other_jl;
+	entry = other_jl->j_list.prev;
+    }
+
+    /* if we didn't find any older uncommitted transactions, return now */
+    if (first_jl == jl) {
+        return 0;
+    }
+
+    first_trans_id = first_jl->j_trans_id;
+
+    entry = &first_jl->j_list;
+    while(1) {
+	other_jl = JOURNAL_LIST_ENTRY(entry);
+	other_trans_id = other_jl->j_trans_id;
+	
+	if (other_trans_id < trans_id) { 
+	    if (atomic_read(&other_jl->j_commit_left) != 0) {
+		flush_commit_list(s, other_jl, 0);
+
+		/* list we were called with is gone, return */
+		if (!journal_list_still_alive(s, trans_id))
+		    return 1;
+
+		/* the one we just flushed is gone, this means all
+		 * older lists are also gone, so first_jl is no longer
+		 * valid either.  Go back to the beginning.
+		 */
+		if (!journal_list_still_alive(s, other_trans_id)) {
+		    goto find_first;
+		}
+	    }
+	    entry = entry->next;
+	    if (entry == &SB_JOURNAL(s)->j_journal_list)
+		return 0;
+	} else {
+	    return 0;
+	}
+    }
+    return 0;
+}
+
 /*
 ** if this journal list still has commit blocks unflushed, send them to disk.
 **
@@ -609,16 +705,19 @@ static void cleanup_freed_for_journal_li
 */
 static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) {
   int i, count ;
-  int index = 0 ;
   int bn ;
   int retry_count = 0 ;
   int orig_commit_left = 0 ;
   struct buffer_head *tbh = NULL ;
-  struct reiserfs_journal_list *other_jl ;
+  unsigned long trans_id = jl->j_trans_id;
 
   reiserfs_check_lock_depth("flush_commit_list") ;
 
   if (atomic_read(&jl->j_older_commits_done)) {
+    if (!list_empty(&jl->j_ordered_bh_list))
+        BUG();
+    if (!list_empty(&jl->j_tail_bh_list))
+        BUG();
     return 0 ;
   }
 
@@ -626,50 +725,51 @@ static int flush_commit_list(struct supe
   ** us is on disk too
   */
   if (jl->j_len <= 0) {
+    BUG();
     return 0 ;
   }
+  if (trans_id == SB_JOURNAL(s)->j_trans_id)
+      BUG();
+
   if (flushall) {
-    /* we _must_ make sure the transactions are committed in order.  Start with the
-    ** index after this one, wrap all the way around 
-    */
-    index = (jl - SB_JOURNAL_LIST(s)) + 1 ;
-    for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
-      other_jl = SB_JOURNAL_LIST(s) + ( (index + i) % JOURNAL_LIST_COUNT) ;
-      if (other_jl && other_jl != jl && other_jl->j_len > 0 && other_jl->j_trans_id > 0 && 
-          other_jl->j_trans_id <= jl->j_trans_id && (atomic_read(&(jl->j_older_commits_done)) == 0)) {
-        flush_commit_list(s, other_jl, 0) ;
-      }
+    if (flush_older_commits(s, jl) == 1) {
+        /* list disappeared during flush_older_commits.  return */
+        return 0;
     }
   }
 
   count = 0 ;
-  /* don't flush the commit list for the current transactoin */
-  if (jl == ((SB_JOURNAL_LIST(s) + SB_JOURNAL_LIST_INDEX(s)))) {
-    return 0 ;
-  }
 
   /* make sure nobody is trying to flush this one at the same time */
-  if (atomic_read(&(jl->j_commit_flushing))) {
-    sleep_on(&(jl->j_commit_wait)) ;
-    if (flushall) {
-      atomic_set(&(jl->j_older_commits_done), 1) ;
-    }
-    return 0 ;
+  down(&jl->j_commit_lock);
+  if (!journal_list_still_alive(s, trans_id)) {
+      up(&jl->j_commit_lock);
+      return 0;
   }
+  if (jl->j_trans_id == 0)
+      BUG();
   
   /* this commit is done, exit */
   if (atomic_read(&(jl->j_commit_left)) <= 0) {
     if (flushall) {
       atomic_set(&(jl->j_older_commits_done), 1) ;
     }
+    if (!list_empty(&jl->j_ordered_bh_list))
+        BUG();
+    if (!list_empty(&jl->j_tail_bh_list))
+        BUG();
+    up(&jl->j_commit_lock);
     return 0 ;
   }
-  /* keeps others from flushing while we are flushing */
-  atomic_set(&(jl->j_commit_flushing), 1) ; 
-
 
+  /* write any buffers that must hit disk before the commit is done */
+  while(!list_empty(&jl->j_ordered_bh_list)) {
+      unlock_kernel();
+      fsync_buffers_list(&jl->j_ordered_bh_list);
+      lock_kernel();
+  }
   if (jl->j_len > SB_JOURNAL_TRANS_MAX(s)) {
-    reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, list number %d\n", jl->j_len, jl - SB_JOURNAL_LIST(s)) ;
+    reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, trans_id %lu\n", jl->j_len, jl->j_trans_id) ;
     return 0 ;
   }
 
@@ -699,7 +799,7 @@ reiserfs_panic(s, "journal-539: flush_co
       if (buffer_dirty(tbh)) {
 	reiserfs_warning(s, "journal-569: flush_commit_list, block already dirty!\n") ;
       } else {				
-	mark_buffer_dirty(tbh) ;
+	atomic_set_buffer_dirty(tbh);
       }
       ll_rw_block(WRITE, 1, &tbh) ;
       count++ ;
@@ -743,14 +843,21 @@ reiserfs_panic(s, "journal-539: flush_co
   atomic_dec(&(jl->j_commit_left)) ;
   bforget(jl->j_commit_bh) ;
 
+  if (SB_JOURNAL(s)->j_last_commit_id != 0 && 
+     (jl->j_trans_id - SB_JOURNAL(s)->j_last_commit_id) != 1) {
+      reiserfs_warning(s, "clm-2200: dev %s, last commit %lu, current %lu\n",
+                       kdevname(s->s_dev), SB_JOURNAL(s)->j_last_commit_id,
+		       SB_JOURNAL(s)->j_last_commit_id);
+  }
+  SB_JOURNAL(s)->j_last_commit_id = jl->j_trans_id;
+
   /* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
   cleanup_freed_for_journal_list(s, jl) ;
 
   if (flushall) {
     atomic_set(&(jl->j_older_commits_done), 1) ;
   }
-  atomic_set(&(jl->j_commit_flushing), 0) ;
-  wake_up(&(jl->j_commit_wait)) ;
+  up(&jl->j_commit_lock);
 
   return 0 ;
 }
@@ -850,20 +957,25 @@ static int update_journal_header_block(s
 ** flush any and all journal lists older than you are 
 ** can only be called from flush_journal_list
 */
-static int flush_older_journal_lists(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, unsigned long trans_id) {
-  int i, index ;
+static int flush_older_journal_lists(struct super_block *p_s_sb, 
+                                     struct reiserfs_journal_list *jl)
+{
+    struct list_head *entry;
   struct reiserfs_journal_list *other_jl ;
+    unsigned long trans_id = jl->j_trans_id;
 
-  index = jl - SB_JOURNAL_LIST(p_s_sb) ;
-  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
-    other_jl = SB_JOURNAL_LIST(p_s_sb) + ((index + i) % JOURNAL_LIST_COUNT) ;
-    if (other_jl && other_jl->j_len > 0 && 
-        other_jl->j_trans_id > 0 && 
-	other_jl->j_trans_id < trans_id && 
-        other_jl != jl) {
+    /* we know we are the only ones flushing things, no extra race
+     * protection is required.
+     */
+restart:
+    entry = SB_JOURNAL(p_s_sb)->j_journal_list.next;
+    other_jl = JOURNAL_LIST_ENTRY(entry);
+    if (other_jl->j_trans_id < trans_id) {
       /* do not flush all */
       flush_journal_list(p_s_sb, other_jl, 0) ; 
-    }
+
+	/* other_jl is now deleted from the list */
+	goto restart;
   }
   return 0 ;
 }
@@ -878,14 +990,23 @@ static void reiserfs_end_buffer_io_sync(
     put_bh(bh) ;
 }
 static void submit_logged_buffer(struct buffer_head *bh) {
-    lock_buffer(bh) ;
     get_bh(bh) ;
     bh->b_end_io = reiserfs_end_buffer_io_sync ;
     mark_buffer_notjournal_new(bh) ;
     clear_bit(BH_Dirty, &bh->b_state) ;
+    if (!buffer_uptodate(bh))
+        BUG();
     submit_bh(WRITE, bh) ;
 }
 
+static void del_from_work_list(struct super_block *s, 
+                               struct reiserfs_journal_list *jl) {
+    if (!list_empty(&jl->j_working_list)) {
+        list_del_init(&jl->j_working_list);
+	SB_JOURNAL(s)->j_num_work_lists--;
+    }
+}
+
 /* flush a journal list, both commit and real blocks
 **
 ** always set flushall to 1, unless you are calling from inside
@@ -906,29 +1027,27 @@ static int flush_journal_list(struct sup
   unsigned long j_len_saved = jl->j_len ;
 
   if (j_len_saved <= 0) {
-    return 0 ;
+    BUG();
   }
 
   if (atomic_read(&SB_JOURNAL(s)->j_wcount) != 0) {
     reiserfs_warning(s, "clm-2048: flush_journal_list called with wcount %d\n",
                       atomic_read(&SB_JOURNAL(s)->j_wcount)) ;
   }
-  /* if someone is getting the commit list, we must wait for them */
-  while (atomic_read(&(jl->j_commit_flushing))) { 
-    sleep_on(&(jl->j_commit_wait)) ;
-  }
-  /* if someone is flushing this list, we must wait for them */
-  while (atomic_read(&(jl->j_flushing))) {
-    sleep_on(&(jl->j_flush_wait)) ;
-  }
 
-  /* this list is now ours, we can change anything we want */
-  atomic_set(&(jl->j_flushing), 1) ;
+  if (jl->j_trans_id == 0)
+      BUG();
+
+  /* if flushall == 0, the lock is already held */
+  if (flushall) {
+      down(&SB_JOURNAL(s)->j_flush_sem);
+  } else if (!down_trylock(&SB_JOURNAL(s)->j_flush_sem)) {
+      BUG();
+  }
 
   count = 0 ;
   if (j_len_saved > SB_JOURNAL_TRANS_MAX(s)) {
-    reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, list number %d\n", j_len_saved, jl - SB_JOURNAL_LIST(s)) ;
-    atomic_dec(&(jl->j_flushing)) ;
+    reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, transid %lu\n", j_len_saved, jl->j_trans_id) ;
     return 0 ;
   }
 
@@ -978,13 +1097,13 @@ static int flush_journal_list(struct sup
       get_bh(saved_bh) ;
 
       if (buffer_journal_dirty(saved_bh)) {
+	if (!can_dirty(cn))
+	    BUG();
         was_jwait = 1 ;
-	mark_buffer_notjournal_dirty(saved_bh) ;
-        /* undo the inc from journal_mark_dirty */
-	put_bh(saved_bh) ;
-      }
-      if (can_dirty(cn)) {
-        was_dirty = 1 ;
+	was_dirty = 1;
+      } else if (can_dirty(cn)) {
+	  /* everything with !pjl && jwait should be writable */
+          BUG();
       }
     }
 
@@ -992,6 +1111,7 @@ static int flush_journal_list(struct sup
     ** sure they are commited, and don't try writing it to disk
     */
     if (pjl) {
+      if (atomic_read(&pjl->j_commit_left))
       flush_commit_list(s, pjl, 1) ;
       goto free_cnode ;
     }
@@ -1026,7 +1146,12 @@ reiserfs_warning(s, "journal-813: BAD! b
       /* we inc again because saved_bh gets decremented at free_cnode */
       get_bh(saved_bh) ;
       set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
+      lock_buffer(saved_bh);
       submit_logged_buffer(saved_bh) ;
+      if (cn->blocknr != saved_bh->b_blocknr) {
+printk("cn %lu does not match bh %lu\n", cn->blocknr, saved_bh->b_blocknr);
+      BUG();
+      }
       count++ ;
     } else {
       reiserfs_warning(s, "clm-2082: Unable to flush buffer %lu in flush_journal_list\n",
@@ -1054,9 +1179,23 @@ free_cnode:
 	if (!cn->bh) {
 	  reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ;
 	}
+        if (cn->blocknr != cn->bh->b_blocknr) {
+printk("2cn %lu does not match bh %lu\n", cn->blocknr, cn->bh->b_blocknr);
+	    BUG();
+        }
 	if (!buffer_uptodate(cn->bh)) {
-	  reiserfs_panic(s, "journal-949: buffer write failed\n") ;
+	  reiserfs_panic(s, "journal-949: buffer %lu write failed\n", cn->bh->b_blocknr) ;
 	}
+
+	/* note, we must clear the JDirty_wait bit after the up to date
+	** check, otherwise we race against our flushpage routine
+	*/
+	if (!test_and_clear_bit(BH_JDirty_wait, &cn->bh->b_state))
+	    BUG();
+
+        /* undo the inc from journal_mark_dirty */
+	put_bh(cn->bh) ;
+
 	refile_buffer(cn->bh) ;
         brelse(cn->bh) ;
       }
@@ -1071,7 +1210,7 @@ flush_older_and_return:
   ** replayed after a crash
   */
   if (flushall) {
-    flush_older_journal_lists(s, jl, jl->j_trans_id) ;
+    flush_older_journal_lists(s, jl);
   } 
   
   /* before we can remove everything from the hash tables for this 
@@ -1086,49 +1225,83 @@ flush_older_and_return:
     update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(s), jl->j_trans_id) ;
   }
   remove_all_from_journal_list(s, jl, 0) ;
+  list_del(&jl->j_list);
+  SB_JOURNAL(s)->j_num_lists--;
+  del_from_work_list(s, jl);
+
+  if (SB_JOURNAL(s)->j_last_flush_id != 0 && 
+     (jl->j_trans_id - SB_JOURNAL(s)->j_last_flush_id) != 1) {
+      reiserfs_warning(s, "clm-2201: dev %s, last flush %lu, current %lu\n",
+                       kdevname(s->s_dev), SB_JOURNAL(s)->j_last_flush_id,
+		       SB_JOURNAL(s)->j_last_flush_id);
+  }
+  SB_JOURNAL(s)->j_last_flush_id = jl->j_trans_id;
+
+  /* not strictly required since we are freeing the list, but it should
+   * help find code using dead lists later on
+   */
   jl->j_len = 0 ;
   atomic_set(&(jl->j_nonzerolen), 0) ;
   jl->j_start = 0 ;
   jl->j_realblock = NULL ;
   jl->j_commit_bh = NULL ;
   jl->j_trans_id = 0 ;
-  atomic_dec(&(jl->j_flushing)) ;
-  wake_up(&(jl->j_flush_wait)) ;
+  jl->j_state = 0;
+
+  if (!list_empty(&jl->j_ordered_bh_list))
+      BUG();
+
+  if (!list_empty(&jl->j_tail_bh_list))
+      BUG();
+
+  // kmem_cache_free(journal_list_cachep, jl);
+  reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
+
+  if (flushall)
+      up(&SB_JOURNAL(s)->j_flush_sem);
   return 0 ;
 } 
 
 
-static int kupdate_one_transaction(struct super_block *s,
-                                    struct reiserfs_journal_list *jl) 
+#define CHUNK_SIZE 32
+struct buffer_chunk {
+    struct buffer_head *bh[CHUNK_SIZE];
+    int nr;
+};
+
+static void write_chunk(struct buffer_chunk *chunk) {
+    int i;
+    for (i = 0; i < chunk->nr ; i++) {
+	submit_logged_buffer(chunk->bh[i]) ;
+    }
+    chunk->nr = 0;
+}
+
+static void add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh) {
+    if (chunk->nr >= CHUNK_SIZE)
+        BUG();
+    chunk->bh[chunk->nr++] = bh;
+    if (chunk->nr >= CHUNK_SIZE)
+        write_chunk(chunk);
+}
+
+static int write_one_transaction(struct super_block *s,
+                                 struct reiserfs_journal_list *jl,
+				 struct buffer_chunk *chunk) 
 {
     struct reiserfs_journal_list *pjl ; /* previous list for this cn */
-    struct reiserfs_journal_cnode *cn, *walk_cn ;
-    unsigned long blocknr ;
-    int run = 0 ;
-    int orig_trans_id = jl->j_trans_id ;
-    struct buffer_head *saved_bh ; 
+    struct reiserfs_journal_cnode *cn;
     int ret = 0 ;
 
-    /* if someone is getting the commit list, we must wait for them */
-    while (atomic_read(&(jl->j_commit_flushing))) {
-        sleep_on(&(jl->j_commit_wait)) ;
-    }
-    /* if someone is flushing this list, we must wait for them */
-    while (atomic_read(&(jl->j_flushing))) {
-        sleep_on(&(jl->j_flush_wait)) ;
-    }
-    /* was it flushed while we slept? */
-    if (jl->j_len <= 0 || jl->j_trans_id != orig_trans_id) {
-        return 0 ;
+    jl->j_state |= LIST_TOUCHED;
+    if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
+	del_from_work_list(s, jl);
+        return 0;
     }
+    del_from_work_list(s, jl);
 
-    /* this list is now ours, we can change anything we want */
-    atomic_set(&(jl->j_flushing), 1) ;
-
-loop_start:
     cn = jl->j_realblock ;
     while(cn) {
-        saved_bh = NULL ;
         /* if the blocknr == 0, this has been cleared from the hash,
         ** skip it
         */
@@ -1140,27 +1313,71 @@ loop_start:
         ** it is allowed to send that buffer to disk
         */
         pjl = find_newer_jl_for_cn(cn) ;
-        if (run == 0 && !pjl && cn->bh && buffer_journal_dirty(cn->bh) &&
-            can_dirty(cn)) 
-        {
+        if (!pjl && cn->bh && buffer_journal_dirty(cn->bh) && can_dirty(cn)) {
             if (!test_bit(BH_JPrepared, &cn->bh->b_state)) {
+		struct buffer_head *tmp_bh;
+		/* we can race against journal_mark_freed when we try
+		 * to lock_buffer(cn->bh), so we have to inc the buffer
+		 * count, and recheck things after locking
+		 */
+		tmp_bh = cn->bh;
+		get_bh(tmp_bh);
                 set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
-		submit_logged_buffer(cn->bh) ;
+		lock_buffer(tmp_bh);
+		if (cn->bh && buffer_journal_dirty(tmp_bh) && 
+		    !test_bit(BH_JPrepared, &tmp_bh->b_state)) 
+		{
+		    add_to_chunk(chunk, tmp_bh);
+		    ret++;
             } else {
-                /* someone else is using this buffer.  We can't 
-                ** send it to disk right now because they might
-                ** be changing/logging it.
+		    /* note, cn->bh might be null now */
+		    unlock_buffer(tmp_bh);
+		}
+		put_bh(tmp_bh);
+            }
+        } 
+next:
+        cn = cn->next ;
+	if (current->need_resched)
+	    schedule();
+    }
+    return ret ;
+}
+
+static int wait_one_transaction(struct super_block *s,
+                                    struct reiserfs_journal_list *jl) 
+{
+    struct reiserfs_journal_list *pjl ; /* previous list for this cn */
+    struct reiserfs_journal_cnode *cn, *walk_cn ;
+    unsigned long blocknr ;
+    struct buffer_head *saved_bh ; 
+    int ret = 0 ;
+
+    if (atomic_read(&jl->j_commit_left) != 0 || jl->j_len <= 0) {
+        BUG();
+    }
+    cn = jl->j_realblock ;
+    while(cn) {
+        saved_bh = NULL ;
+        /* if the blocknr == 0, this has been cleared from the hash,
+        ** skip it
                 */
-                ret = 1 ;
+        if (cn->blocknr == 0) {
+            goto next ;
             }
-        } else if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
+        /* look for a more recent transaction that logged this
+        ** buffer.  Only the most recent transaction with a buffer in
+        ** it is allowed to send that buffer to disk
+        */
+        pjl = find_newer_jl_for_cn(cn) ;
+        if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
             clear_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
             if (!pjl && cn->bh) {
                 wait_on_buffer(cn->bh) ;
-            }
             /* check again, someone could have logged while we scheduled */
             pjl = find_newer_jl_for_cn(cn) ;
 
+            }
             /* before the JDirty_wait bit is set, the 
             ** buffer is added to the hash list.  So, if we are
             ** run in the middle of a do_journal_end, we will notice
@@ -1207,59 +1424,182 @@ loop_start:
         } 
 next:
         cn = cn->next ;
+	if (current->need_resched)
+	    schedule();
+    }
+    return ret ;
+}
+
+static int kupdate_transactions(struct super_block *s,
+                                   struct reiserfs_journal_list *jl,
+				   struct reiserfs_journal_list **next_jl,
+				   unsigned long *next_trans_id,
+				   int num_blocks,
+				   int num_trans) {
+    int ret = 0;
+    int written = 0 ;
+    int transactions_flushed = 0;
+    unsigned long orig_trans_id = jl->j_trans_id;
+    struct reiserfs_journal_list *orig_jl = jl;
+    struct buffer_chunk chunk;
+    struct list_head *entry;
+    chunk.nr = 0;
+
+    down(&SB_JOURNAL(s)->j_flush_sem);
+    if (!journal_list_still_alive(s, orig_trans_id)) {
+	goto done;
     }
-    /* the first run through the loop sends all the dirty buffers to
-    ** ll_rw_block.
-    ** the second run through the loop does all the accounting
+
+    /* we've got j_flush_sem held, nobody is going to delete any
+     * of these lists out from underneath us
     */
-    if (run++ == 0) {
-        goto loop_start ;
+    while((num_trans && transactions_flushed < num_trans) || 
+          (!num_trans && written < num_blocks)) {
+
+	if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
+	    atomic_read(&jl->j_commit_left))
+	{
+	    del_from_work_list(s, jl);
+	    break;
     }
-    atomic_set(&(jl->j_flushing), 0) ;
-    wake_up(&(jl->j_flush_wait)) ;
-    return ret ;
+	ret = write_one_transaction(s, jl, &chunk);
+
+	if (ret < 0)
+	    goto done;
+	transactions_flushed++;
+	written += ret;
+	entry = jl->j_list.next;
+
+	/* did we wrap? */
+	if (entry == &SB_JOURNAL(s)->j_journal_list) {
+	    break;
+        }
+	jl = JOURNAL_LIST_ENTRY(entry);
+
+	/* don't bother with older transactions */
+	if (jl->j_trans_id <= orig_trans_id)
+	    break;
+    }
+    if (chunk.nr) {
+        write_chunk(&chunk);
+    }
+
+    jl = orig_jl;
+    *next_jl = jl;
+    *next_trans_id = jl->j_trans_id;
+    ret = transactions_flushed;
+    while(transactions_flushed--) {
+
+	wait_one_transaction(s, jl);
+	entry = jl->j_list.next;
+	jl = JOURNAL_LIST_ENTRY(entry);
+
+	/* make sure we can really count */
+	if (jl->j_trans_id <= orig_trans_id && transactions_flushed > 0) {
+printk("flushing %s %lu, orig_trans_id was %lu\n", kdevname(s->s_dev), jl->j_trans_id, orig_trans_id);
+	    BUG();
+        }
+	*next_jl = jl;
+	*next_trans_id = jl->j_trans_id;
+    }
+
+done:
+    up(&SB_JOURNAL(s)->j_flush_sem);
+    return ret;
 }
+
+/* for o_sync and fsync heavy applications, they tend to use 
+** all the journa list slots with tiny transactions.  These
+** trigger lots and lots of calls to update the header block, which
+** adds seeks and slows things down.
+** 
+** This function tries to clear out a large chunk of the journal lists
+** at once, which makes everything faster since only the newest journal
+** list updates the header block
+*/
+static int flush_used_journal_lists(struct super_block *s, 
+                                    struct reiserfs_journal_list *jl) {
+    unsigned long len = 0;
+    unsigned long cur_len;
+    int ret;
+    int i;
+    struct reiserfs_journal_list *tjl;
+    struct reiserfs_journal_list *flush_jl;
+    unsigned long trans_id;
+
+    flush_jl = tjl = jl;
+
+    /* flush for 256 transactions or 256 blocks, whichever comes first */
+    for(i = 0 ; i < 256 && len < 256 ; i++) {
+	if (atomic_read(&tjl->j_commit_left) || 
+	    tjl->j_trans_id < jl->j_trans_id) {
+	    break;
+	}
+	cur_len = atomic_read(&tjl->j_nonzerolen);
+	if (cur_len > 0) {
+	    tjl->j_state &= ~LIST_TOUCHED;
+	}
+	len += cur_len;
+	flush_jl = tjl;
+	if (tjl->j_list.next == &SB_JOURNAL(s)->j_journal_list)
+	    break;
+	tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
+    }
+    /* try to find a group of blocks we can flush across all the
+    ** transactions, but only bother if we've actually spanned 
+    ** across multiple lists
+    */
+    if (flush_jl != jl) {
+	ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
+    }
+    flush_journal_list(s, flush_jl, 1) ;  
+    return 0;
+}
+
+
 /* since we never give dirty buffers to bdflush/kupdate, we have to
 ** flush them ourselves.  This runs through the journal lists, finds
 ** old metadata in need of flushing and sends it to disk.
 ** this does not end transactions, commit anything, or free
 ** cnodes.
-**
-** returns the highest transaction id that was flushed last time
 */
 static unsigned long reiserfs_journal_kupdate(struct super_block *s) {
-    struct reiserfs_journal_list *jl ;
-    int i ;
-    int start ;
+    struct reiserfs_journal_list *jl, *next_jl;
+    unsigned long trans_id, next_trans_id;
     time_t age ;
-    int ret = 0 ;
 
-    start = SB_JOURNAL_LIST_INDEX(s) ;
+    jl = JOURNAL_WORK_ENTRY(SB_JOURNAL(s)->j_working_list.next);
 
-    /* safety check to prevent flush attempts during a mount */
-    if (start < 0) {
+restart:
+    /* kupdate transactions might not set next_trans_id, it must be
+     * initialized before each call 
+     */
+    next_trans_id = 0;
+    if (list_empty(&SB_JOURNAL(s)->j_working_list)) {
         return 0 ;
     }
-    i = (start + 1) % JOURNAL_LIST_COUNT ;
-    while(i != start) {
-        jl = SB_JOURNAL_LIST(s) + i  ;
-        age = CURRENT_TIME - jl->j_timestamp ;
-        if (jl->j_len > 0 && age >= JOURNAL_MAX_COMMIT_AGE && 
-            atomic_read(&(jl->j_nonzerolen)) > 0 &&
-            atomic_read(&(jl->j_commit_left)) == 0) {
+    trans_id = jl->j_trans_id;
 
-            if (jl->j_trans_id == SB_JOURNAL(s)->j_trans_id) {
-                break ;
-            }
-            /* if ret was already 1, we want to preserve that */
-            ret |= kupdate_one_transaction(s, jl) ;
+    /* check for race with the code that frees lists */
+    if (jl->j_trans_id == 0)
+        BUG();
+    age = CURRENT_TIME - jl->j_timestamp ;
+    if (age >= SB_JOURNAL_MAX_COMMIT_AGE(s) &&
+        atomic_read(&jl->j_nonzerolen) > 0 &&
+	atomic_read(&jl->j_commit_left) == 0)
+    {
+        if (kupdate_transactions(s, jl, &next_jl, &next_trans_id, 32, 32) < 0)
+	    return 0;
+	if (next_jl != JOURNAL_WORK_ENTRY(&SB_JOURNAL(s)->j_working_list) &&
+	    next_trans_id > trans_id) 
+	{
+	    if (journal_list_still_alive(s, next_trans_id)) {
+		jl = next_jl;
+		goto restart;
         } 
-        if (atomic_read(&(jl->j_nonzerolen)) > 0) {
-            ret |= 1 ;
         }
-        i = (i + 1) % JOURNAL_LIST_COUNT ;
     }
-    return ret ;
+    return 0;
 }
 
 /*
@@ -1303,6 +1643,12 @@ void remove_journal_hash(struct reiserfs
 }
 
 static void free_journal_ram(struct super_block *p_s_sb) {
+
+  // kmem_cache_free(journal_list_cachep, SB_JOURNAL(p_s_sb)->j_current_jl);
+  reiserfs_kfree(SB_JOURNAL(p_s_sb)->j_current_jl, 
+                 sizeof(struct reiserfs_journal_list), p_s_sb);
+  SB_JOURNAL(p_s_sb)->j_num_lists--;
+
   vfree(SB_JOURNAL(p_s_sb)->j_cnode_free_orig) ;
   free_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap) ;
   free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */
@@ -1813,61 +2159,6 @@ start_log_replay:
   return 0 ;
 }
 
-
-struct reiserfs_journal_commit_task {
-  struct super_block *p_s_sb ;
-  int jindex ;
-  int wake_on_finish ; /* if this is one, we wake the task_done queue, if it
-                       ** is zero, we free the whole struct on finish
-		       */
-  struct reiserfs_journal_commit_task *self ;
-  struct wait_queue *task_done ;
-  struct tq_struct task ;
-} ;
-
-static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_task *ct) {
-
-  struct reiserfs_journal_list *jl ;
-  jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ;
-
-  flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ; 
-  reiserfs_kfree(ct->self, sizeof(struct reiserfs_journal_commit_task), ct->p_s_sb) ;
-}
-
-static void setup_commit_task_arg(struct reiserfs_journal_commit_task *ct,
-                                  struct super_block *p_s_sb, 
-				  int jindex) {
-  if (!ct) {
-    reiserfs_panic(NULL, "journal-1360: setup_commit_task_arg called with NULL struct\n") ;
-  }
-  ct->p_s_sb = p_s_sb ;
-  ct->jindex = jindex ;
-  ct->task_done = NULL ;
-  INIT_LIST_HEAD(&ct->task.list) ;
-  ct->task.sync = 0 ;
-  ct->task.routine = (void *)(void *)reiserfs_journal_commit_task_func ; 
-  ct->self = ct ;
-  ct->task.data = (void *)ct ;
-}
-
-static void commit_flush_async(struct super_block *p_s_sb, int jindex) {
-  struct reiserfs_journal_commit_task *ct ;
-  /* using GFP_NOFS, GFP_KERNEL could try to flush inodes, which will try
-  ** to start/join a transaction, which will deadlock
-  */
-  ct = reiserfs_kmalloc(sizeof(struct reiserfs_journal_commit_task), GFP_NOFS, p_s_sb) ;
-  if (ct) {
-    setup_commit_task_arg(ct, p_s_sb, jindex) ;
-    queue_task(&(ct->task), &reiserfs_commit_thread_tq);
-    wake_up(&reiserfs_commit_thread_wait) ;
-  } else {
-#ifdef CONFIG_REISERFS_CHECK
-    reiserfs_warning(p_s_sb, "journal-1540: kmalloc failed, doing sync commit\n") ;
-#endif
-    flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ;
-  }
-}
-
 /*
 ** this is the commit thread.  It is started with kernel_thread on
 ** FS mount, and journal_release() waits for it to exit.
@@ -1895,25 +2186,73 @@ static int reiserfs_journal_commit_threa
   lock_kernel() ;
   while(1) {
 
-    while(TQ_ACTIVE(reiserfs_commit_thread_tq)) {
-      run_task_queue(&reiserfs_commit_thread_tq) ;
-    }
-    if (CURRENT_TIME - last_run > 5) {
+restart:
 	down(&kreiserfsd_sem);
 	list_for_each_safe(entry, safe, &kreiserfsd_supers) {
 	    s = list_entry(entry, struct super_block, 
 	                   u.reiserfs_sb.s_reiserfs_supers);    
 	    if (!(s->s_flags & MS_RDONLY)) {
+	    flush_async_commits(s);
+
+	    if (CURRENT_TIME - last_run > 5) {
 		reiserfs_flush_old_commits(s);
 	    }
+
+	    if (!list_empty(&SB_JOURNAL(s)->j_working_list)) {
+	        struct reiserfs_journal_list *jl, *tjl;
+		unsigned long trans_id ;
+		unsigned long start;
+		unsigned long cur_start;
+		unsigned long nfract = SB_ONDISK_JOURNAL_SIZE(s) / 4;
+		int ret;
+
+		jl = JOURNAL_WORK_ENTRY(SB_JOURNAL(s)->j_working_list.next);
+		cur_start = SB_JOURNAL(s)->j_start;
+		start = jl->j_start;
+
+		/* pretend the log doesn't actually wrap */
+		if (cur_start < start) {
+		    cur_start = cur_start + SB_ONDISK_JOURNAL_SIZE(s);
+		}
+
+		/* if the first transaction on the working list is more
+		 * than nfract blocks away from the current transaction start
+		 * or there are more than 128 working lists, start
+		 * a background flush
+		 */
+		if (cur_start - start > nfract || 
+		    SB_JOURNAL(s)->j_num_work_lists > 32) {
+		    tjl=JOURNAL_LIST_ENTRY(SB_JOURNAL(s)->j_journal_list.next);
+		    ret = kupdate_transactions(s, jl, &tjl, &trans_id,32,128);
+		}
+	    }
 	}
+    }
+    /* check again for new async commits that need tending */
+    list_for_each_safe(entry, safe, &kreiserfsd_supers) {
+	s = list_entry(entry, struct super_block, 
+		       u.reiserfs_sb.s_reiserfs_supers);    
+	if (!list_empty(&SB_JOURNAL(s)->j_journal_list)) {
+	    struct reiserfs_journal_list *jl;
+	    struct list_head *entry;
+
+	    /* last entry is the youngest, commit it and you get everything */
+	    entry = SB_JOURNAL(s)->j_journal_list.prev;
+	    jl = JOURNAL_LIST_ENTRY(entry);
+	    if (!atomic_read(&(jl->j_older_commits_done))) {
+		/* give new mounts a chance to come in */
 	up(&kreiserfsd_sem);
 	last_run = CURRENT_TIME;
+		wake_up_all(&reiserfs_commit_thread_done) ;
+		goto restart;
+	    }
     }
+    }
+    up(&kreiserfsd_sem);
+    last_run = CURRENT_TIME;
 
     /* if there aren't any more filesystems left, break */
     if (reiserfs_mounted_fs_count <= 0) {
-      run_task_queue(&reiserfs_commit_thread_tq) ;
       break ;
     }
     wake_up(&reiserfs_commit_thread_done) ;
@@ -1924,12 +2263,28 @@ static int reiserfs_journal_commit_threa
   return 0 ;
 }
 
-static void journal_list_init(struct super_block *p_s_sb) {
-  int i ;
-  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
-    init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_commit_wait)) ;
-    init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_flush_wait)) ;
+static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
+{
+    struct reiserfs_journal_list *jl;
+retry:
+    // jl = (struct reiserfs_journal_list *)kmem_cache_alloc(journal_list_cachep, SLAB_NOFS);
+    jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, s);
+    if (!jl) {
+	yield();
+	goto retry;
   }
+    memset(jl, 0, sizeof(*jl));
+    INIT_LIST_HEAD(&jl->j_list);
+    INIT_LIST_HEAD(&jl->j_working_list);
+    INIT_LIST_HEAD(&jl->j_ordered_bh_list);
+    INIT_LIST_HEAD(&jl->j_tail_bh_list);
+    sema_init(&jl->j_commit_lock, 1);
+    SB_JOURNAL(s)->j_num_lists++;
+    return jl;
+}
+
+static void journal_list_init(struct super_block *p_s_sb) {
+    SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
 }
 
 static int release_journal_dev( struct super_block *super,
@@ -2051,6 +2404,7 @@ int journal_init(struct super_block *p_s
     struct reiserfs_super_block * rs;
     struct reiserfs_journal_header *jh;
     struct reiserfs_journal *journal;
+    struct reiserfs_journal_list *jl;
 
     if (sizeof(struct reiserfs_journal_commit) != 4096 ||
 	sizeof(struct reiserfs_journal_desc) != 4096) {
@@ -2168,11 +2521,6 @@ int journal_init(struct super_block *p_s
     brelse (bhjh);
 
     SB_JOURNAL(p_s_sb)->j_list_bitmap_index = 0 ;
-    SB_JOURNAL_LIST_INDEX(p_s_sb) = -10000 ; /* make sure flush_old_commits does not try to flush a list while replay is on */
-
-    /* clear out the journal list array */
-    memset(SB_JOURNAL_LIST(p_s_sb), 0, 
-           sizeof(struct reiserfs_journal_list) * JOURNAL_LIST_COUNT) ; 
 
     journal_list_init(p_s_sb) ;
 
@@ -2180,8 +2528,6 @@ int journal_init(struct super_block *p_s
            JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
     memset(journal_writers, 0, sizeof(char *) * 512) ; /* debug code */
 
-    INIT_LIST_HEAD(&(SB_JOURNAL(p_s_sb)->j_dirty_buffers)) ;
-
     SB_JOURNAL(p_s_sb)->j_start = 0 ;
     SB_JOURNAL(p_s_sb)->j_len = 0 ;
     SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ;
@@ -2192,6 +2538,9 @@ int journal_init(struct super_block *p_s
     SB_JOURNAL(p_s_sb)->j_first = NULL ;     
     init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
     sema_init(&SB_JOURNAL(p_s_sb)->j_lock, 1);
+    sema_init(&SB_JOURNAL(p_s_sb)->j_flush_sem, 1);
+    INIT_LIST_HEAD (&SB_JOURNAL(p_s_sb)->j_journal_list);
+    INIT_LIST_HEAD (&SB_JOURNAL(p_s_sb)->j_working_list);
     
     SB_JOURNAL(p_s_sb)->j_trans_id = 10 ;  
     SB_JOURNAL(p_s_sb)->j_mount_id = 10 ; 
@@ -2204,8 +2553,9 @@ int journal_init(struct super_block *p_s
     SB_JOURNAL(p_s_sb)->j_cnode_used = 0 ;
     SB_JOURNAL(p_s_sb)->j_must_wait = 0 ;
     init_journal_hash(p_s_sb) ;
-    SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb)) ;
-    if (!(SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap)) {
+    jl = SB_JOURNAL(p_s_sb)->j_current_jl;
+    jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl) ;
+    if (!jl->j_list_bitmap) {
 	reiserfs_warning(p_s_sb, "journal-2005, get_list_bitmap failed for journal list 0\n") ;
 	goto free_and_return;
     }
@@ -2213,8 +2563,6 @@ int journal_init(struct super_block *p_s
 	reiserfs_warning(p_s_sb, "Replay Failure, unable to mount\n") ;
 	goto free_and_return;
     }
-    /* once the read is done, we can set this where it belongs */
-    SB_JOURNAL_LIST_INDEX(p_s_sb) = 0 ; 
 
     if (reiserfs_dont_log (p_s_sb))
 	return 0;
@@ -2241,7 +2589,9 @@ free_and_return:
 */
 int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) {
   time_t now = CURRENT_TIME ;
-  if (reiserfs_dont_log(th->t_super)) 
+
+  /* cannot restart while nested unless the parent allows it */
+  if (!reiserfs_restartable_handle(th) && th->t_refcount > 1)
     return 0 ;
   if ( SB_JOURNAL(th->t_super)->j_must_wait > 0 ||
        (SB_JOURNAL(th->t_super)->j_len_alloc + new_alloc) >= SB_JOURNAL_MAX_BATCH(th->t_super) || 
@@ -2250,6 +2600,45 @@ int journal_transaction_should_end(struc
        SB_JOURNAL(th->t_super)->j_cnode_free < (SB_JOURNAL_TRANS_MAX(th->t_super) * 3)) { 
     return 1 ;
   }
+
+  /* we are allowing them to continue in the current transaction, so
+  * we have to bump the blocks allocated now.
+  */
+  th->t_blocks_allocated += new_alloc;
+  SB_JOURNAL(th->t_super)->j_len_alloc += new_alloc;
+
+  return 0 ;
+}
+
+int 
+reiserfs_restart_transaction(struct reiserfs_transaction_handle *th, int num) {
+    int refcount = th->t_refcount ;
+    struct super_block *s = th->t_super ;
+    int flags = th->t_flags ;
+    int parent_flags = 0;
+    struct reiserfs_transaction_handle *saved_th = current->journal_info ;
+
+    /* if refcount is > 1, saved_th is the parent we've nested into, save
+    ** his flags as well.  So far, only intermezzo needs this, 99% of the
+    ** time it is horribly unsafe.
+    */
+    if (refcount > 1) {
+	if (!reiserfs_restartable_handle(saved_th)) {
+	    BUG() ;
+	}
+	th->t_refcount = 1; 
+	parent_flags = saved_th->t_flags ;
+    }
+    th->t_flags = 0 ;
+    journal_end(th, s, th->t_blocks_allocated) ;
+    journal_begin(th, s, num) ;
+    th->t_flags = flags; 
+    if (refcount > 1) {
+	current->journal_info = saved_th ;
+        th->t_refcount = refcount ;
+	memcpy(saved_th, th, sizeof(*th)) ;
+	saved_th->t_flags = parent_flags ;
+    }
   return 0 ;
 }
 
@@ -2279,6 +2668,37 @@ void reiserfs_wait_on_write_block(struct
                !test_bit(WRITERS_BLOCKED, &SB_JOURNAL(s)->j_state)) ;
 }
 
+static void queue_log_writer(struct super_block *s) {
+    set_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state);
+    sleep_on(&SB_JOURNAL(s)->j_join_wait);
+}
+
+static void wake_queued_writers(struct super_block *s) {
+    if (test_and_clear_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state)) {
+        wake_up(&SB_JOURNAL(s)->j_join_wait);
+    }
+}
+
+static void let_transaction_grow(struct super_block *sb, 
+                                 unsigned long trans_id)
+{
+    unsigned long bcount = SB_JOURNAL(sb)->j_bcount;
+    while(1) {
+	yield();
+        while ((atomic_read(&SB_JOURNAL(sb)->j_wcount) > 0 ||
+	        atomic_read(&SB_JOURNAL(sb)->j_jlock)) && 
+	       SB_JOURNAL(sb)->j_trans_id == trans_id) {
+	    queue_log_writer(sb);
+	}
+	if (SB_JOURNAL(sb)->j_trans_id != trans_id)
+	    break;
+	if (bcount == SB_JOURNAL(sb)->j_bcount)
+	    break;
+	bcount = SB_JOURNAL(sb)->j_bcount;
+    }
+}
+
+
 /* join == true if you must join an existing transaction.
 ** join == false if you can deal with waiting for others to finish
 **
@@ -2286,8 +2706,10 @@ void reiserfs_wait_on_write_block(struct
 ** expect to use in nblocks.
 */
 static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) {
-  time_t now = CURRENT_TIME ;
+  time_t now ;
   int old_trans_id  ;
+  struct reiserfs_transaction_handle myth ;
+  int sched_count = 0;
 
   reiserfs_check_lock_depth("journal_begin") ;
   RFALSE( p_s_sb->s_flags & MS_RDONLY, 
@@ -2298,9 +2720,14 @@ static int do_journal_begin_r(struct rei
     return 0 ;
   }
   PROC_INFO_INC( p_s_sb, journal.journal_being );
+  /* set here for journal_join */
+  th->t_refcount = 1; 
+  th->t_flags = 0 ;
+  th->t_super = p_s_sb ;
 
 relock:
   lock_journal(p_s_sb) ;
+  SB_JOURNAL(p_s_sb)->j_bcount++ ;
 
   if (test_bit(WRITERS_BLOCKED, &SB_JOURNAL(p_s_sb)->j_state)) {
     unlock_journal(p_s_sb) ;
@@ -2308,6 +2735,7 @@ relock:
     PROC_INFO_INC( p_s_sb, journal.journal_relock_writers );
     goto relock ;
   }
+  now = CURRENT_TIME;
 
   /* if there is no room in the journal OR
   ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 
@@ -2321,53 +2748,128 @@ relock:
      (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) ) ||
      (!join && SB_JOURNAL(p_s_sb)->j_cnode_free < (SB_JOURNAL_TRANS_MAX(p_s_sb) * 3))) {
 
+    old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
     unlock_journal(p_s_sb) ; /* allow others to finish this transaction */
 
-    /* if writer count is 0, we can just force this transaction to end, and start
-    ** a new one afterwards.
+    if (!join && (SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) >= 
+        SB_JOURNAL_MAX_BATCH(p_s_sb) && 
+	((SB_JOURNAL(p_s_sb)->j_len + nblocks + 2) * 100) < 
+	(SB_JOURNAL(p_s_sb)->j_len_alloc * 75))
+    {
+	if (atomic_read(&SB_JOURNAL(p_s_sb)->j_wcount) > 10) {
+	    sched_count++;
+	    queue_log_writer(p_s_sb);
+	    goto relock;
+	}
+    } 
+    /* don't mess with joining the transaction if all we have to do is
+     * wait for someone else to do a commit
     */
-    if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) {
-      struct reiserfs_transaction_handle myth ;
+    if (atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) {
+	while (SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id &&
+	       atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) {
+	    queue_log_writer(p_s_sb);
+        }
+	goto relock;
+    }
       journal_join(&myth, p_s_sb, 1) ;
-      reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
-      journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
-      do_journal_end(&myth, p_s_sb,1,COMMIT_NOW) ;
+  
+    /* someone might have ended the transaction while we joined */
+    if (old_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) {
+        do_journal_end(&myth, p_s_sb, 1, 0) ;
     } else {
-      /* but if the writer count isn't zero, we have to wait for the current writers to finish.
-      ** They won't batch on transaction end once we set j_jlock
-      */
-      atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
-      old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
-      while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) &&
-            SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id) {
-	sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
-      }
+        do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW) ;
     }
     PROC_INFO_INC( p_s_sb, journal.journal_relock_wcount );
     goto relock ;
   }
 
   if (SB_JOURNAL(p_s_sb)->j_trans_start_time == 0) { /* we are the first writer, set trans_id */
-    SB_JOURNAL(p_s_sb)->j_trans_start_time = now ;
+    SB_JOURNAL(p_s_sb)->j_trans_start_time = CURRENT_TIME;
   }
   atomic_inc(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
   SB_JOURNAL(p_s_sb)->j_len_alloc += nblocks ;
   th->t_blocks_logged = 0 ;
   th->t_blocks_allocated = nblocks ;
-  th->t_super = p_s_sb ;
   th->t_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
-  th->t_caller = "Unknown" ;
+  reiserfs_set_handle_active(th) ;
   unlock_journal(p_s_sb) ;
   return 0 ;
 }
 
+struct reiserfs_transaction_handle *
+reiserfs_persistent_transaction(struct super_block *s, unsigned long nblocks) {
+    int ret ;
+    struct reiserfs_transaction_handle *th ;
 
+    /* if we're nesting into an existing transaction.  It will be
+    ** persistent on its own
+    */
+    if (reiserfs_transaction_running(s)) {
+        th = current->journal_info ;
+	th->t_refcount++ ;
+	if (th->t_refcount < 2) {
+	    BUG() ;
+	}
+	return th ;
+    }
+    th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS, s) ;
+    if (!th) {
+       return ERR_PTR(-ENOMEM) ;
+    }
+    ret = journal_begin(th, s, nblocks) ;
+    if (ret) {
+	reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ;
+        return ERR_PTR(ret) ;
+    }
+    /* do_journal_end is now responsible for freeing the handle */
+    reiserfs_set_handle_persistent(th) ;
+    return th ;
+}
 static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  struct reiserfs_transaction_handle *cur_th = current->journal_info;
+
+  /* this keeps do_journal_end from NULLing out the current->journal_info
+  ** pointer
+  */
+  th->t_handle_save = cur_th ;
+  if (cur_th && cur_th->t_refcount > 1) {
+      BUG() ;
+  }
   return do_journal_begin_r(th, p_s_sb, nblocks, 1) ;
 }
 
 int journal_begin(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, unsigned long nblocks) {
-  return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
+    struct reiserfs_transaction_handle *cur_th = current->journal_info ;
+    int ret ;
+
+    th->t_handle_save = NULL ;
+    if (cur_th) {
+	/* we are nesting into the current transaction */
+	if (cur_th->t_super == p_s_sb) {
+	      cur_th->t_refcount++ ;
+	      memcpy(th, cur_th, sizeof(*th)); 
+	      th->t_flags = 0 ;
+	      reiserfs_set_handle_active(th) ;
+	      if (th->t_refcount <= 1) 
+		      printk("BAD: refcount <= 1, but journal_info != 0\n"); 
+	      return 0;
+	} else {
+	    /* we've ended up with a handle from a different filesystem.
+	    ** save it and restore on journal_end.  This should never
+	    ** really happen...
+	    */
+	    reiserfs_warning(p_s_sb, "clm-2100: nesting info a different FS\n") ;
+	    th->t_handle_save = current->journal_info ;
+	    current->journal_info = th;
+	}
+    } else {
+	current->journal_info = th;
+    }
+    ret = do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
+    if (current->journal_info != th)
+        BUG() ;
+    return ret ;
 }
 
 /* not used at all */
@@ -2423,6 +2925,7 @@ int journal_mark_dirty(struct reiserfs_t
 
   if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) {
     reiserfs_warning(p_s_sb, "journal-1409: journal_mark_dirty returning because j_wcount was %d\n", atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount))) ;
+    BUG();
     return 1 ;
   }
   /* this error means I've screwed up, and we've overflowed the transaction.  
@@ -2489,25 +2992,36 @@ done:
   return 0 ;
 }
 
-/*
-** if buffer already in current transaction, do a journal_mark_dirty
-** otherwise, just mark it dirty and move on.  Used for writes to meta blocks
-** that don't need journaling
-*/
-int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) {
-  if (reiserfs_dont_log(th->t_super) || buffer_journaled(bh) || 
-      buffer_journal_dirty(bh)) {
-    return journal_mark_dirty(th, p_s_sb, bh) ;
+int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+
+    int ret;
+    if (!current->journal_info && th->t_refcount > 1) 
+	printk("REISER-NESTING: th NULL, refcount %d\n", th->t_refcount); 
+    if (th->t_refcount > 1) { 
+	struct reiserfs_transaction_handle *cur_th = current->journal_info ;
+
+	/* we aren't allowed to close a nested transaction on a different
+	** filesystem from the one in the task struct
+	*/
+	if (cur_th->t_super != th->t_super)
+	    BUG() ;
+
+	th->t_refcount--;
+	if (th != cur_th) {
+	    int flags = cur_th->t_flags ;
+	    /* nested handles are never persistent */
+	    if (reiserfs_persistent_handle(th)) {
+		BUG() ;
   }
-  if (get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, bh->b_dev,bh->b_blocknr,bh->b_size)) {
-    return journal_mark_dirty(th, p_s_sb, bh) ;
+	    memcpy(cur_th, th, sizeof(*th));
+	    th->t_flags = 0 ;
+	    cur_th->t_flags = flags ;
   }
-  mark_buffer_dirty(bh) ;
-  return 0 ;
-}
-
-int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
-  return do_journal_end(th, p_s_sb, nblocks, 0) ;
+	ret = 0;
+    } else {
+	ret = do_journal_end(th, p_s_sb, nblocks, 0) ;
+    }
+    return ret;
 }
 
 /* removes from the current transaction, relsing and descrementing any counters.  
@@ -2610,6 +3124,10 @@ static int can_dirty(struct reiserfs_jou
 */
 int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
 
+  /* you are not allowed to sync while nested, very, very bad */
+  if (th->t_refcount > 1) {
+    BUG() ;
+  }
   if (SB_JOURNAL(p_s_sb)->j_len == 0) {
     reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
     journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
@@ -2634,12 +3152,14 @@ int show_reiserfs_locks(void) {
 **
 */
 void flush_async_commits(struct super_block *p_s_sb) {
-  int i ;
+  struct reiserfs_journal_list *jl;
+  struct list_head *entry;
 
-  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
-    if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) {
-      flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ; 
-    }
+  if (!list_empty(&SB_JOURNAL(p_s_sb)->j_journal_list)) {
+      /* last entry is the youngest, commit it and you get everything */
+      entry = SB_JOURNAL(p_s_sb)->j_journal_list.prev;
+      jl = JOURNAL_LIST_ENTRY(entry);
+      flush_commit_list(p_s_sb, jl, 1);
   }
 }
 
@@ -2649,39 +3169,26 @@ void flush_async_commits(struct super_bl
 **
 */
 int reiserfs_flush_old_commits(struct super_block *p_s_sb) {
-  int i ;
-  int count = 0;
-  int start ; 
   time_t now ; 
   struct reiserfs_transaction_handle th ; 
 
-  start =  SB_JOURNAL_LIST_INDEX(p_s_sb) ;
   now = CURRENT_TIME ;
-
-  /* safety check so we don't flush while we are replaying the log during mount */
-  if (SB_JOURNAL_LIST_INDEX(p_s_sb) < 0) {
+    /* safety check so we don't flush while we are replaying the log during 
+     * mount 
+     */
+    if (list_empty(&SB_JOURNAL(p_s_sb)->j_journal_list)) {
     return 0  ;
   }
-  /* starting with oldest, loop until we get to the start */
-  i = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ;
-  while(i != start) {
-    if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb))) {
-      /* we have to check again to be sure the current transaction did not change */
-      if (i != SB_JOURNAL_LIST_INDEX(p_s_sb))  {
-	flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ;
-      }
-    }
-    i = (i + 1) % JOURNAL_LIST_COUNT ;
-    count++ ;
-  }
 
-  /* now, check the current transaction.  If there are no writers, and it is too old, finish it, and
-  ** force the commit blocks to disk
+    /* check the current transaction.  If there are no writers, and it is 
+     * too old, finish it, and force the commit blocks to disk 
   */
   if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 &&  
      SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && 
      SB_JOURNAL(p_s_sb)->j_len > 0 && 
-     (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > SB_JOURNAL_MAX_TRANS_AGE(p_s_sb)) {
+        (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > 
+	SB_JOURNAL_MAX_TRANS_AGE(p_s_sb)) 
+    {
     journal_join(&th, p_s_sb, 1) ;
     reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
     journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
@@ -2714,6 +3220,7 @@ static int check_journal_end(struct reis
   int flush = flags & FLUSH_ALL ;
   int commit_now = flags & COMMIT_NOW ;
   int wait_on_commit = flags & WAIT ;
+  struct reiserfs_journal_list *jl;
 
   if (th->t_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) {
     reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", 
@@ -2732,8 +3239,9 @@ static int check_journal_end(struct reis
   if (SB_JOURNAL(p_s_sb)->j_len == 0) {
     int wcount = atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
     unlock_journal(p_s_sb) ;
+    BUG();
     if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) {
-      atomic_dec(&(SB_JOURNAL(p_s_sb)->j_jlock)) ;
+      atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
       wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
     }
     return 0 ;
@@ -2746,7 +3254,11 @@ static int check_journal_end(struct reis
   */
   if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0) {
     if (flush || commit_now) {
-      int orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ;
+      unsigned trans_id ;
+
+      jl = SB_JOURNAL(p_s_sb)->j_current_jl;
+      trans_id = jl->j_trans_id;
+
       atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
       if (flush) {
         SB_JOURNAL(p_s_sb)->j_next_full_flush = 1 ;
@@ -2752,18 +3264,27 @@ static int check_journal_end(struct reis
         SB_JOURNAL(p_s_sb)->j_next_full_flush = 1 ;
       }
       unlock_journal(p_s_sb) ;
+
       /* sleep while the current transaction is still j_jlocked */
-      while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) && 
-            SB_JOURNAL(p_s_sb)->j_trans_id == th->t_trans_id) {
-	sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
-      }
-      if (commit_now) {
-	if (wait_on_commit) {
-	  flush_commit_list(p_s_sb,  SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
+      while(SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) {
+	if (atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) {
+	    queue_log_writer(p_s_sb);
 	} else {
-	  commit_flush_async(p_s_sb, orig_jindex) ; 
+	    lock_journal(p_s_sb);
+	    if (SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) {
+	        atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
+	    } 
+	    unlock_journal(p_s_sb);
 	}
       }
+      if (SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) {
+          BUG();
+      }
+      if (commit_now && journal_list_still_alive(p_s_sb, trans_id) &&
+          wait_on_commit) 
+      {
+	  flush_commit_list(p_s_sb, jl, 1) ;
+      }
       return 0 ;
     } 
     unlock_journal(p_s_sb) ;
@@ -2781,8 +3302,8 @@ static int check_journal_end(struct reis
   if (!(SB_JOURNAL(p_s_sb)->j_must_wait > 0) && !(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock))) && !flush && !commit_now && 
       (SB_JOURNAL(p_s_sb)->j_len < SB_JOURNAL_MAX_BATCH(p_s_sb))  && 
       SB_JOURNAL(p_s_sb)->j_len_alloc < SB_JOURNAL_MAX_BATCH(p_s_sb) && SB_JOURNAL(p_s_sb)->j_cnode_free > (SB_JOURNAL_TRANS_MAX(p_s_sb) * 3)) {
-    SB_JOURNAL(p_s_sb)->j_bcount++ ;
     unlock_journal(p_s_sb) ;
+
     return 0 ;
   }
 
@@ -2812,16 +3333,13 @@ int journal_mark_freed(struct reiserfs_t
   struct reiserfs_list_bitmap *jb = NULL ;
   int cleaned = 0 ;
   
-  if (reiserfs_dont_log(th->t_super)) {
-    bh = sb_get_hash_table(p_s_sb, blocknr) ;
-    if (bh && buffer_dirty (bh)) {
-      reiserfs_warning (p_s_sb, "journal_mark_freed(dont_log): dirty buffer on hash list: %lx %ld\n", bh->b_state, blocknr);
-      BUG ();
-    }
-    brelse (bh);
-    return 0 ;
+  cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, p_s_sb->s_dev,
+                                       blocknr, p_s_sb->s_blocksize) ;
+  if (cn && cn->bh) {
+      bh = cn->bh ;
+      get_bh(bh) ;
   }
-  bh = sb_get_hash_table(p_s_sb, blocknr) ;
+
   /* if it is journal new, we just remove it from this transaction */
   if (bh && buffer_journal_new(bh)) {
     mark_buffer_notjournal_new(bh) ;
@@ -2829,14 +3347,22 @@ int journal_mark_freed(struct reiserfs_t
     cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ;
   } else {
     /* set the bit for this block in the journal bitmap for this transaction */
-    jb = SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap ;
+    jb = SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap;
     if (!jb) {
       reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ;
     }
+
+    /* we set bits in the list bitmap so the block won't be reallocated
+     * as a data block which might get flushed before this transaction 
+     * commits.  When data logging is on, the block might get reallocated
+     * as a data block, but we know the data block won't get flushed before
+     * we commit
+     */
+    if (!reiserfs_data_log(p_s_sb)) {
     set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ;
+    } 
 
     /* Note, the entire while loop is not allowed to schedule.  */
-
     if (bh) {
       clear_prepared_bits(bh) ;
     }
@@ -2881,57 +3407,77 @@ int journal_mark_freed(struct reiserfs_t
 
 void reiserfs_update_inode_transaction(struct inode *inode) {
   
-  inode->u.reiserfs_i.i_trans_index = SB_JOURNAL_LIST_INDEX(inode->i_sb);
-
+  inode->u.reiserfs_i.i_jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
   inode->u.reiserfs_i.i_trans_id = SB_JOURNAL(inode->i_sb)->j_trans_id ;
 }
 
 void reiserfs_update_tail_transaction(struct inode *inode) {
   
-  inode->u.reiserfs_i.i_tail_trans_index = SB_JOURNAL_LIST_INDEX(inode->i_sb);
-
+  inode->u.reiserfs_i.i_tail_jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
   inode->u.reiserfs_i.i_tail_trans_id = SB_JOURNAL(inode->i_sb)->j_trans_id ;
 }
 
-static void __commit_trans_index(struct inode *inode, unsigned long id,
-                                 unsigned long index) 
+static void __commit_trans_jl(struct inode *inode, unsigned long id,
+                                 struct reiserfs_journal_list *jl) 
 {
-    struct reiserfs_journal_list *jl ;
     struct reiserfs_transaction_handle th ;
     struct super_block *sb = inode->i_sb ;
 
-    jl = SB_JOURNAL_LIST(sb) + index;
-
     /* is it from the current transaction, or from an unknown transaction? */
     if (id == SB_JOURNAL(sb)->j_trans_id) {
-	journal_join(&th, sb, 1) ;
+	jl = SB_JOURNAL(sb)->j_current_jl;
+	/* try to let other writers come in and grow this transaction */
+	let_transaction_grow(sb, id);
+	if (SB_JOURNAL(sb)->j_trans_id != id) {
+	    goto flush_commit_only;
+	}
+
+	journal_begin(&th, sb, 1) ;
+
+	/* someone might have ended this transaction while we joined */
+	if (SB_JOURNAL(sb)->j_trans_id != id) {
+	    reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 1) ;
+	    journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)) ;
+	    journal_end(&th, sb, 1) ;
+	    goto flush_commit_only;
+	}
+
 	journal_end_sync(&th, sb, 1) ;
-    } else if (jl->j_trans_id == id) {
+	
+    } else {
+	/* this gets tricky, we have to make sure the journal list in
+	 * the inode still exists.  We know the list is still around
+	 * if we've got a larger transaction id than the oldest list
+	 */
+flush_commit_only:
+	if (journal_list_still_alive(inode->i_sb, id)) {
 	flush_commit_list(sb, jl, 1) ;
     }
-    /* if the transaction id does not match, this list is long since flushed
-    ** and we don't have to do anything here
-    */
+    }
+    /* otherwise the list is gone, and long since committed */
 }
 void reiserfs_commit_for_tail(struct inode *inode) {
     unsigned long id = inode->u.reiserfs_i.i_tail_trans_id;
-    unsigned long index = inode->u.reiserfs_i.i_tail_trans_index;
+    struct reiserfs_journal_list *jl = inode->u.reiserfs_i.i_tail_jl;
 
     /* for tails, if this info is unset there's nothing to commit */
-    if (id && index)
-	__commit_trans_index(inode, id, index);
+    if (id && jl)
+	__commit_trans_jl(inode, id, jl);
 }
 void reiserfs_commit_for_inode(struct inode *inode) {
     unsigned long id = inode->u.reiserfs_i.i_trans_id;
-    unsigned long index = inode->u.reiserfs_i.i_trans_index;
+    struct reiserfs_journal_list *jl = inode->u.reiserfs_i.i_jl;
 
-    /* for the whole inode, assume unset id or index means it was
+    /* for the whole inode, assume unset id means it was
      * changed in the current transaction.  More conservative
      */
-    if (!id || !index)
+    if (!id || !jl) {
 	reiserfs_update_inode_transaction(inode) ;
+	id = inode->u.reiserfs_i.i_trans_id;
+	/* jl will be updated in __commit_trans_jl */
+    }
 
-    __commit_trans_index(inode, id, index);
+    __commit_trans_jl(inode, id, jl);
 }
 
 void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, 
@@ -2959,8 +3505,6 @@ void reiserfs_prepare_for_journal(struct
   int retry_count = 0 ;
 
   PROC_INFO_INC( p_s_sb, journal.prepare );
-  if (reiserfs_dont_log (p_s_sb))
-    return;
 
   while(!test_bit(BH_JPrepared, &bh->b_state) ||
         (wait && buffer_locked(bh))) {
@@ -2972,13 +3517,33 @@ void reiserfs_prepare_for_journal(struct
     if (wait) {
       RFALSE( buffer_locked(bh) && cur_tb != NULL,
 	      "waiting while do_balance was running\n") ;
+      /* only data buffers are allowed to come in dirty, and they 
+       * never get run through restore_prepared_buffer.  So we can
+       * just mark them clean here and know it is safe
+       */
+      mark_buffer_clean(bh);
       wait_on_buffer(bh) ;
     }
     PROC_INFO_INC( p_s_sb, journal.prepare_retry );
     retry_count++ ;
   }
 }
-
+static void flush_old_journal_lists(struct super_block *s) {
+    struct reiserfs_journal_list *jl;
+    struct list_head *entry;
+    time_t now = CURRENT_TIME;
+
+    while(!list_empty(&SB_JOURNAL(s)->j_journal_list)) {
+        entry = SB_JOURNAL(s)->j_journal_list.next;
+	jl = JOURNAL_LIST_ENTRY(entry);
+	/* this check should always be run, to send old lists to disk */
+	if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
+	    flush_used_journal_lists(s, jl);
+	} else {
+	    break;
+	}
+    }
+}
 /* 
 ** long and ugly.  If flush, will not return until all commit
 ** blocks and all real buffers in the trans are on disk.
@@ -2995,17 +3560,29 @@ static int do_journal_end(struct reiserf
   struct buffer_head *c_bh ; /* commit bh */
   struct buffer_head *d_bh ; /* desc bh */
   int cur_write_start = 0 ; /* start index of current log write */
-  int cur_blocks_left = 0 ; /* number of journal blocks left to write */
   int old_start ;
   int i ;
-  int jindex ;
-  int orig_jindex ;
   int flush = flags & FLUSH_ALL ;
   int wait_on_commit = flags & WAIT ;
   struct reiserfs_super_block *rs ; 
+  struct reiserfs_journal_list *jl, *temp_jl;
+  struct list_head *entry, *safe;
+  int wakeup_kreiserfsd = 0;
+  unsigned long jindex;
+  unsigned long commit_trans_id;
+
+  if (th->t_refcount > 1)
+    BUG() ;
 
+  reiserfs_check_lock_depth("journal end");
+  current->journal_info = th->t_handle_save;
   if (reiserfs_dont_log(th->t_super)) {
-    return 0 ;
+    goto out ;
+  }
+
+  if (SB_JOURNAL(p_s_sb)->j_len == 0) {
+      reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
+      journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
   }
 
   lock_journal(p_s_sb) ;
@@ -3023,7 +3600,8 @@ static int do_journal_end(struct reiserf
   */
   if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
     p_s_sb->s_dirt = 1;
-    return 0 ;
+    wake_queued_writers(p_s_sb);
+    goto out ;
   }
 
   /* check_journal_end might set these, check again */
@@ -3039,8 +3617,11 @@ static int do_journal_end(struct reiserf
   }
 
 #ifdef REISERFS_PREALLOCATE
+  /* quota ops might need to nest, setup the journal_info pointer for them */
+  current->journal_info = th ;
   reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
 				      * the transaction */
+  current->journal_info = th->t_handle_save ;
 #endif
   
   rs = SB_DISK_SUPER_BLOCK(p_s_sb) ;
@@ -3061,25 +3642,23 @@ static int do_journal_end(struct reiserf
   mark_buffer_uptodate(c_bh, 1) ;
 
   /* init this journal list */
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_older_commits_done), 0) ;
-  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
-  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ;
-  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_bh = c_bh ;
-  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_start = SB_JOURNAL(p_s_sb)->j_start ;
-  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len = SB_JOURNAL(p_s_sb)->j_len ;  
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_nonzerolen), SB_JOURNAL(p_s_sb)->j_len) ;
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_left), SB_JOURNAL(p_s_sb)->j_len + 2);
-  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = NULL ;
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ;
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ;
-
-  /* which is faster, locking/unlocking at the start and end of the for
-  ** or locking once per iteration around the insert_journal_hash?
-  ** eitherway, we are write locking insert_journal_hash.  The ENTIRE FOR
-  ** LOOP MUST not cause schedule to occur.
-  */
+  jl = SB_JOURNAL(p_s_sb)->j_current_jl;
+
+  /* save the transaction id in case we need to commit it later */
+  commit_trans_id = jl->j_trans_id;
+
+  atomic_set(&jl->j_older_commits_done, 0) ;
+  jl->j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
+  jl->j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ;
+  jl->j_commit_bh = c_bh ;
+  jl->j_start = SB_JOURNAL(p_s_sb)->j_start ;
+  jl->j_len = SB_JOURNAL(p_s_sb)->j_len ;  
+  atomic_set(&jl->j_nonzerolen, SB_JOURNAL(p_s_sb)->j_len) ;
+  atomic_set(&jl->j_commit_left, SB_JOURNAL(p_s_sb)->j_len + 2);
+  jl->j_realblock = NULL ;
 
-  /* for each real block, add it to the journal list hash,
+  /* The ENTIRE FOR LOOP MUST not cause schedule to occur.
+  **  for each real block, add it to the journal list hash,
   ** copy into real block index array in the commit or desc block
   */
   for (i = 0, cn = SB_JOURNAL(p_s_sb)->j_first ; cn ; cn = cn->next, i++) {
@@ -3089,7 +3668,7 @@ static int do_journal_end(struct reiserf
         reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ;
       }
       if (i == 0) {
-        SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = jl_cn ;
+        jl->j_realblock = jl_cn ;
       }
       jl_cn->prev = last_cn ;
       jl_cn->next = NULL ;
@@ -3107,7 +3686,7 @@ static int do_journal_end(struct reiserf
       jl_cn->state = 0 ;
       jl_cn->dev = cn->bh->b_dev ; 
       jl_cn->bh = cn->bh ;
-      jl_cn->jlist = SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb) ;
+      jl_cn->jlist = jl;
       insert_journal_hash(SB_JOURNAL(p_s_sb)->j_list_hash_table, jl_cn) ; 
       if (i < JOURNAL_TRANS_HALF) {
 	desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ;
@@ -3132,47 +3711,29 @@ static int do_journal_end(struct reiserf
 reiserfs_warning(p_s_sb, "journal-2020: do_journal_end: BAD desc->j_len is ZERO\n") ;
     atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
     wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
-    return 0 ;
+    goto out ;
   }
 
   /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
   cur_write_start = SB_JOURNAL(p_s_sb)->j_start ;
-  cur_blocks_left = SB_JOURNAL(p_s_sb)->j_len  ;
   cn = SB_JOURNAL(p_s_sb)->j_first ;
   jindex = 1 ; /* start at one so we don't get the desc again */
-  while(cur_blocks_left > 0) {
+  while(cn) {
+    clear_bit(BH_JNew, &(cn->bh->b_state)) ;
     /* copy all the real blocks into log area.  dirty log blocks */
     if (test_bit(BH_JDirty, &cn->bh->b_state)) {
       struct buffer_head *tmp_bh ;
       tmp_bh =  journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 
 		       ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
       mark_buffer_uptodate(tmp_bh, 1) ;
-      memcpy(tmp_bh->b_data, cn->bh->b_data, cn->bh->b_size) ;  
+      memcpy(tmp_bh->b_data, bh_kmap(cn->bh), cn->bh->b_size) ;
+      bh_kunmap(cn->bh);
       jindex++ ;
-    } else {
-      /* JDirty cleared sometime during transaction.  don't log this one */
-      reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!\n") ;
-    }
-    cn = cn->next ;
-    cur_blocks_left-- ;
-  }
-
-  /* we are done  with both the c_bh and d_bh, but
-  ** c_bh must be written after all other commit blocks,
-  ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
-  */
-
-  /* now loop through and mark all buffers from this transaction as JDirty_wait
-  ** clear the JDirty bit, clear BH_JNew too.  
-  ** if they weren't JDirty, they weren't logged, just relse them and move on
-  */
-  cn = SB_JOURNAL(p_s_sb)->j_first ; 
-  while(cn) {
-    clear_bit(BH_JNew, &(cn->bh->b_state)) ;
-    if (test_bit(BH_JDirty, &(cn->bh->b_state))) {
       set_bit(BH_JDirty_wait, &(cn->bh->b_state)) ; 
       clear_bit(BH_JDirty, &(cn->bh->b_state)) ;
     } else {
+      /* JDirty cleared sometime during transaction.  don't log this one */
+      reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!\n") ;
       brelse(cn->bh) ;
     }
     next = cn->next ;
@@ -3180,24 +3741,24 @@ reiserfs_warning(p_s_sb, "journal-2020: 
     cn = next ;
   }
 
-  /* unlock the journal list for committing and flushing */
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 0) ;
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 0) ;
-
-  orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ;
-  jindex = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; 
-  SB_JOURNAL_LIST_INDEX(p_s_sb) = jindex ;
+  /* we are done  with both the c_bh and d_bh, but
+  ** c_bh must be written after all other commit blocks,
+  ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
+  */
 
-  /* write any buffers that must hit disk before this commit is done */
-  fsync_buffers_list(&(SB_JOURNAL(p_s_sb)->j_dirty_buffers)) ;
+  SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
 
-  /* honor the flush wishes from the caller.  simple commits can
-   * be done outside the journal lock, they are done below
-   */
-  if (flush) {
-    flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
-    flush_journal_list(p_s_sb,  SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 1) ;  
-  }
+  /* we lock the commit before putting it onto the main list because
+   * we want to make sure nobody tries to run flush_commit_list until
+   * the new transaction is fully setup, and we've already flushed the
+   * ordered bh list
+   */
+  down(&jl->j_commit_lock);
+
+  /* now it is safe to insert this transaction on the main list */
+  list_add_tail(&jl->j_list, &SB_JOURNAL(p_s_sb)->j_journal_list);
+  list_add_tail(&jl->j_working_list, &SB_JOURNAL(p_s_sb)->j_working_list);
+  SB_JOURNAL(p_s_sb)->j_num_work_lists++;
 
   /* reset journal values for the next transaction */
   old_start = SB_JOURNAL(p_s_sb)->j_start ;
@@ -3209,64 +3770,118 @@ reiserfs_warning(p_s_sb, "journal-2020: 
   SB_JOURNAL(p_s_sb)->j_len = 0 ;
   SB_JOURNAL(p_s_sb)->j_trans_start_time = 0 ;
   SB_JOURNAL(p_s_sb)->j_trans_id++ ;
+  SB_JOURNAL(p_s_sb)->j_current_jl->j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id;
   SB_JOURNAL(p_s_sb)->j_must_wait = 0 ;
   SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ;
   SB_JOURNAL(p_s_sb)->j_next_full_flush = 0 ;
   SB_JOURNAL(p_s_sb)->j_next_async_flush = 0 ;
   init_journal_hash(p_s_sb) ; 
 
+  /* tail conversion targets have to hit the disk before we end the 
+   * transaction.  Otherwise a later transaction might repack the tail
+   * before this transaction commits, leaving the data block unflushed and 
+   * clean, if we crash before the later transaction commits, the data block
+   * is lost.
+   */
+  while(!list_empty(&jl->j_tail_bh_list)) {
+      unlock_kernel();
+      fsync_buffers_list(&jl->j_tail_bh_list);
+      lock_kernel();
+  }
+  up(&jl->j_commit_lock);
+
+  /* honor the flush wishes from the caller.  simple commits can
+   * be done outside the journal lock, they are done below
+   */
+  if (flush) {
+    flush_commit_list(p_s_sb, jl, 1) ;
+    flush_journal_list(p_s_sb, jl, 1) ;  
+  }
+
   /* if the next transaction has any chance of wrapping, flush 
   ** transactions that might get overwritten.  If any journal lists are very 
   ** old flush them as well.  
   */
-  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
-    jindex = i ;
-    if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && SB_JOURNAL(p_s_sb)->j_start <= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
-      if ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) >= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
-	flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ; 
-      }
-    } else if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && 
-              (SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
-      if (((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= 
-            SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
-	flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ; 
+first_jl:
+  list_for_each_safe(entry, safe, &SB_JOURNAL(p_s_sb)->j_journal_list) {
+    temp_jl = JOURNAL_LIST_ENTRY(entry);
+    if (SB_JOURNAL(p_s_sb)->j_start <= temp_jl->j_start) {
+      if ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) >= 
+          temp_jl->j_start) 
+      {
+	flush_used_journal_lists(p_s_sb, temp_jl);
+	wakeup_kreiserfsd = 1;
+	goto first_jl;
+      } else if ((SB_JOURNAL(p_s_sb)->j_start + 
+                  SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) < 
+		  SB_ONDISK_JOURNAL_SIZE(p_s_sb)) 
+      {
+          /* if we don't cross into the next transaction and we don't
+	   * wrap, there is no way we can overlap any later transactions
+	   * break now
+	   */
+	  break;
       }
+    } else if ((SB_JOURNAL(p_s_sb)->j_start + 
+                SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) > 
+		SB_ONDISK_JOURNAL_SIZE(p_s_sb)) 
+    {
+      if (((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) % 
+            SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= temp_jl->j_start) 
+      {
+	flush_used_journal_lists(p_s_sb, temp_jl);
+	wakeup_kreiserfsd = 1;
+	goto first_jl;
+      } else {
+	  /* we don't overlap anything from out start to the end of the 
+	   * log, and our wrapped portion doesn't overlap anything at
+	   * the start of the log.  We can break
+	   */
+	  break;
     } 
-    /* this check should always be run, to send old lists to disk */
-    if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && 
-              SB_JOURNAL_LIST(p_s_sb)[jindex].j_timestamp < 
-	      (CURRENT_TIME - (SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) * 4))) {
-	flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ; 
     }
   }
+  flush_old_journal_lists(p_s_sb);
 
-  /* if the next journal_list is still in use, flush it */
-  if (SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len != 0) {
-    flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb), 1) ; 
+  /* soft limit */
+  if (SB_JOURNAL(p_s_sb)->j_num_work_lists > 128 || wakeup_kreiserfsd) {
+      wake_up(&reiserfs_commit_thread_wait) ;
   }
 
-  /* we don't want anyone flushing the new transaction's list */
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ;
-  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ;
-  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + 
-											 SB_JOURNAL_LIST_INDEX(p_s_sb)) ;
+  SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL(p_s_sb)->j_current_jl) ;
 
-  if (!(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap)) {
+  if (!(SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap)) {
     reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ;
   }
-  unlock_journal(p_s_sb) ;
+
   atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
+  unlock_journal(p_s_sb) ;
   /* wake up any body waiting to join. */
+  clear_bit(WRITERS_QUEUED, &SB_JOURNAL(p_s_sb)->j_state);
   wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
   
-  if (!flush) {
-    if (current->need_resched)
+  if (!flush && wait_on_commit) {
+      if (current->need_resched) {
       schedule() ;
-    if (wait_on_commit) {
-      flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
-    } else {
-      commit_flush_async(p_s_sb, orig_jindex) ; 
     }
+      if (journal_list_still_alive(p_s_sb, commit_trans_id))
+	  flush_commit_list(p_s_sb, jl, 1) ;
+  }
+  /* if we did an async commit, get kreiserfsd going on it */
+  if (!wait_on_commit) {
+      wake_up(&reiserfs_commit_thread_wait) ;
+      schedule();
   }
+out:
+  reiserfs_check_lock_depth("journal end2");
+  if (reiserfs_persistent_handle(th)) {
+      memset(th, 0, sizeof(*th));
+      reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), p_s_sb) ;
+  } else
+      th->t_flags = 0 ;
   return 0 ;
 }
+
+int __init reiserfs_journal_cache_init(void) {
+    return 0;
+}
Only in linux-2.4.24.new/fs/reiserfs: journal.c~
diff -rupBb linux-2.4.24.orig/fs/reiserfs/namei.c linux-2.4.24.new/fs/reiserfs/namei.c
--- linux-2.4.24.orig/fs/reiserfs/namei.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/namei.c	Tue Mar 16 13:43:45 2004
@@ -536,7 +536,6 @@ static int reiserfs_create (struct inode
 	return retval ;
 
     journal_begin(&th, dir->i_sb, jbegin_count) ;
-    th.t_caller = "create" ;
     retval = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode);
     if (retval) {
 	goto out_failed ;
diff -rupBb linux-2.4.24.orig/fs/reiserfs/procfs.c linux-2.4.24.new/fs/reiserfs/procfs.c
--- linux-2.4.24.orig/fs/reiserfs/procfs.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/procfs.c	Tue Mar 16 13:17:18 2004
@@ -497,7 +497,6 @@ int reiserfs_journal_in_proc( char *buff
 			"j_first_unflushed_offset: \t%lu\n"
 			"j_last_flush_trans_id: \t%lu\n"
 			"j_trans_start_time: \t%li\n"
-			"j_journal_list_index: \t%i\n"
 			"j_list_bitmap_index: \t%i\n"
 			"j_must_wait: \t%i\n"
 			"j_next_full_flush: \t%i\n"
@@ -543,7 +542,6 @@ int reiserfs_journal_in_proc( char *buff
 			JF( j_first_unflushed_offset ),
 			JF( j_last_flush_trans_id ),
 			JF( j_trans_start_time ),
-			JF( j_journal_list_index ),
 			JF( j_list_bitmap_index ),
 			JF( j_must_wait ),
 			JF( j_next_full_flush ),
diff -rupBb linux-2.4.24.orig/fs/reiserfs/stree.c linux-2.4.24.new/fs/reiserfs/stree.c
--- linux-2.4.24.orig/fs/reiserfs/stree.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/stree.c	Tue Mar 16 13:43:45 2004
@@ -652,8 +652,8 @@ int search_by_key (struct super_block * 
                                        stop at leaf level - set to
                                        DISK_LEAF_NODE_LEVEL */
     ) {
-    int  n_block_number = SB_ROOT_BLOCK (p_s_sb),
-      expected_level = SB_TREE_HEIGHT (p_s_sb),
+    int  n_block_number, 
+         expected_level,
       n_block_size    = p_s_sb->s_blocksize;
     struct buffer_head  *       p_s_bh;
     struct path_element *       p_s_last_element;
@@ -678,8 +678,11 @@ int search_by_key (struct super_block * 
     /* With each iteration of this loop we search through the items in the
        current node, and calculate the next current node(next path element)
        for the next iteration of this loop.. */
+    n_block_number = SB_ROOT_BLOCK (p_s_sb);
+    expected_level = SB_TREE_HEIGHT (p_s_sb);
     while ( 1 ) {
 
+        reiserfs_check_lock_depth("search_by_key");
 #ifdef CONFIG_REISERFS_CHECK
 	if ( !(++n_repeat_counter % 50000) )
 	    reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:"
@@ -1132,9 +1135,7 @@ static char  prepare_for_delete_or_cut(
 		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
 		    int orig_len_alloc = th->t_blocks_allocated ;
 		    pathrelse(p_s_path) ;
-
-		    journal_end(th, p_s_sb, orig_len_alloc) ;
-		    journal_begin(th, p_s_sb, orig_len_alloc) ;
+		    reiserfs_restart_transaction(th, orig_len_alloc);
 		    reiserfs_update_inode_transaction(inode) ;
 		    need_research = 1;
 		    break;
@@ -1484,6 +1485,38 @@ static void indirect_to_direct_roll_back
     mark_inode_dirty (inode);
 }
 
+static void
+unmap_buffers(struct page *page, loff_t pos) {
+    struct buffer_head *bh ;
+    struct buffer_head *head ;
+    struct buffer_head *next ;
+    unsigned long tail_index ;
+    unsigned long cur_index ;
+
+    if (!page || !page->buffers)
+        return;
+    
+    tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
+    cur_index = 0 ;
+    head = page->buffers ;
+    bh = head ;
+    do {
+        next = bh->b_this_page ;
+
+        /* we want to unmap the buffers that contain the tail, and
+        ** all the buffers after it (since the tail must be at the
+        ** end of the file).  We don't want to unmap file data
+        ** before the tail, since it might be dirty and waiting to
+        ** reach disk
+        */
+        cur_index += bh->b_size ;
+        if (cur_index > tail_index) {
+            reiserfs_unmap_buffer(bh) ;
+        }
+	bh = next ;
+    } while (bh != head) ;
+}
+
 
 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
 int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, 
@@ -1499,6 +1532,7 @@ int reiserfs_cut_from_item (struct reise
        structure by using the init_tb_struct and fix_nodes functions.
        After that we can make tree balancing. */
     struct tree_balance s_cut_balance;
+    loff_t tail_pos = 0;
     int n_cut_size = 0,        /* Amount to be cut. */
 	n_ret_value = CARRY_ON,
 	n_removed = 0,     /* Number of the removed unformatted nodes. */
@@ -1531,6 +1565,9 @@ int reiserfs_cut_from_item (struct reise
 		/* tail has been left in the unformatted node */
 		return n_ret_value;
 
+	    if (n_is_inode_locked) {
+printk("inode locked twice\n");
+	    }
 	    n_is_inode_locked = 1;
 	  
 	    /* removing of last unformatted node will change value we
@@ -1545,6 +1582,7 @@ int reiserfs_cut_from_item (struct reise
       	    set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT);
 	    p_s_item_key->key_length = 4;
 	    n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1));
+	    tail_pos = n_new_file_size;
 	    set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1);
 	    if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){
 		print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1);
@@ -1642,6 +1680,7 @@ int reiserfs_cut_from_item (struct reise
 	** deal with it here.
 	*/
 	p_s_inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
+	unmap_buffers(page, tail_pos);
     }
     return n_ret_value;
 }
@@ -1681,6 +1720,7 @@ void reiserfs_do_truncate (struct reiser
 	n_new_file_size;/* New file size. */
     int                   n_deleted;      /* Number of deleted or truncated bytes. */
     int retval;
+    int jbegin_count = th->t_blocks_allocated;
 
     if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
 	return;
@@ -1760,17 +1800,14 @@ void reiserfs_do_truncate (struct reiser
 	** sure the file is consistent before ending the current trans
 	** and starting a new one
 	*/
-        if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
-	  int orig_len_alloc = th->t_blocks_allocated ;
+        if (journal_transaction_should_end(th, jbegin_count)) {
 	  decrement_counters_in_path(&s_search_path) ;
 
 	  if (update_timestamps) {
 	      p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME;
 	  } 
 	  reiserfs_update_sd(th, p_s_inode) ;
-
-	  journal_end(th, p_s_inode->i_sb, orig_len_alloc) ;
-	  journal_begin(th, p_s_inode->i_sb, orig_len_alloc) ;
+	  reiserfs_restart_transaction(th, jbegin_count) ;
 	  reiserfs_update_inode_transaction(p_s_inode) ;
 	}
     } while ( n_file_size > ROUND_UP (n_new_file_size) &&
diff -rupBb linux-2.4.24.orig/fs/reiserfs/super.c linux-2.4.24.new/fs/reiserfs/super.c
--- linux-2.4.24.orig/fs/reiserfs/super.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/super.c	Tue Mar 16 13:43:45 2004
@@ -13,6 +13,9 @@
 #include <linux/locks.h>
 #include <linux/init.h>
 
+EXPORT_SYMBOL(journal_begin) ;
+EXPORT_SYMBOL(journal_end) ;
+
 #define REISERFS_OLD_BLOCKSIZE 4096
 #define REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ 20
 
@@ -471,6 +474,14 @@ static const arg_desc_t hash[] = {
     {NULL, 0, 0}
 };
 
+/* possible values for -o data= */
+static const arg_desc_t logging_mode[] = {
+    {"ordered", 1<<REISERFS_DATA_ORDERED, (1<<REISERFS_DATA_LOG|1<<REISERFS_DATA_WRITEBACK)},
+    {"journal", 1<<REISERFS_DATA_LOG, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_WRITEBACK)},
+    {"writeback", 1<<REISERFS_DATA_WRITEBACK, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_LOG)},
+    {NULL, 0}
+};
+
 
 /* possible values for "-o block-allocator=" and bits which are to be set in
    s_mount_opt of reiserfs specific part of in-core super block */
@@ -620,6 +631,7 @@ static int reiserfs_parse_options (struc
 		
 		{"block-allocator", 'a', balloc, 0, 0},
 		{"hash", 'h', hash, 1<<FORCE_HASH_DETECT, 0},
+		{"data", 'd', logging_mode, 0, 0},
 		
 		{"resize", 'r', 0, 0, 0},
 		{"attrs", 0, 0, 1<<REISERFS_ATTRS, 0},
@@ -680,6 +692,47 @@ static void handle_attrs( struct super_b
 	}
 }
 
+static void switch_data_mode(struct super_block *s, unsigned long mode) {
+    struct reiserfs_transaction_handle th;
+    int sync_all = !reiserfs_data_log(s);
+
+    journal_begin(&th, s, 1);
+    SB_JOURNAL(s)->j_must_wait = 1;
+    journal_end_sync(&th, s, 1);
+
+    s->u.reiserfs_sb.s_mount_opt &= ~((1 << REISERFS_DATA_LOG) |
+                                       (1 << REISERFS_DATA_ORDERED) |
+				       (1 << REISERFS_DATA_WRITEBACK));
+    s->u.reiserfs_sb.s_mount_opt |= (1 << mode);
+
+    journal_begin(&th, s, 1);
+    SB_JOURNAL(s)->j_must_wait = 1;
+    journal_end_sync(&th, s, 1);
+
+    if (sync_all)
+        fsync_no_super(s->s_dev);
+}
+
+static void handle_data_mode(struct super_block *s, unsigned long mount_options)
+{
+    if (mount_options & (1 << REISERFS_DATA_LOG)) {
+        if (!reiserfs_data_log(s)) {
+	    switch_data_mode(s, REISERFS_DATA_LOG);
+	    printk("reiserfs: switching to journaled data mode\n");
+	}
+    } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) {
+        if (!reiserfs_data_ordered(s)) {
+	    switch_data_mode(s, REISERFS_DATA_ORDERED);
+	    printk("reiserfs: switching to ordered data mode\n");
+	}
+    } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) {
+        if (!reiserfs_data_writeback(s)) {
+	    switch_data_mode(s, REISERFS_DATA_WRITEBACK);
+	    printk("reiserfs: switching to writeback data mode\n");
+	}
+    }
+}
+
 static int reiserfs_remount (struct super_block * s, int * mount_flags, char * data)
 {
   struct reiserfs_super_block * rs;
@@ -731,9 +784,10 @@ static int reiserfs_remount (struct supe
     s->s_dirt = 0;
   } else {
     /* remount read-write */
-    if (!(s->s_flags & MS_RDONLY))
+    if (!(s->s_flags & MS_RDONLY)) {
+	handle_data_mode(s, mount_options);
 	return 0; /* We are read-write already */
-
+    }
     s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */
     journal_begin(&th, s, 10) ;
     
@@ -751,9 +805,10 @@ static int reiserfs_remount (struct supe
   SB_JOURNAL(s)->j_must_wait = 1 ;
   journal_end(&th, s, 10) ;
 
-  if (!( *mount_flags & MS_RDONLY ) )
+  if (!( *mount_flags & MS_RDONLY ) ) {
     finish_unfinished( s );
-
+    handle_data_mode(s, mount_options);
+  }
   return 0;
 }
 
@@ -1227,9 +1282,22 @@ static struct super_block * reiserfs_rea
     printk("reiserfs:warning: - it is slow mode for debugging.\n");
 #endif
 
-    /* fixme */
-    jdev_name = NULL;
+    /* make data=ordered the default */
+    if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) &&
+        !reiserfs_data_writeback(s))
+    {
+        s->u.reiserfs_sb.s_mount_opt |= (1 << REISERFS_DATA_ORDERED);
+    }
 
+    if (reiserfs_data_log(s)) {
+        printk("reiserfs: using journaled data mode\n");
+    } else if (reiserfs_data_ordered(s)) {
+        printk("reiserfs: using ordered data mode\n");
+    } else {
+        printk("reiserfs: using writeback data mode\n");
+    }
+
+    jdev_name = NULL;
     if( journal_init(s, jdev_name, old_format) ) {
 	reiserfs_warning(s, "sh-2022: reiserfs_read_super: unable to initialize journal space\n") ;
 	goto error ;
@@ -1369,16 +1437,19 @@ static DECLARE_FSTYPE_DEV(reiserfs_fs_ty
 
 static int __init init_reiserfs_fs (void)
 {
+        int ret;
 	reiserfs_proc_info_global_init();
 	reiserfs_proc_register_global( "version", 
 				       reiserfs_global_version_in_proc );
+	ret = reiserfs_journal_cache_init();
+	if (ret)
+	    return ret;
         return register_filesystem(&reiserfs_fs_type);
 }
 
 MODULE_DESCRIPTION("ReiserFS journaled filesystem");
 MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>");
 MODULE_LICENSE("GPL");
-EXPORT_NO_SYMBOLS;
 
 static void __exit exit_reiserfs_fs(void)
 {
diff -rupBb linux-2.4.24.orig/fs/reiserfs/tail_conversion.c linux-2.4.24.new/fs/reiserfs/tail_conversion.c
--- linux-2.4.24.orig/fs/reiserfs/tail_conversion.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/fs/reiserfs/tail_conversion.c	Tue Mar 16 13:43:45 2004
@@ -152,39 +152,6 @@ void reiserfs_unmap_buffer(struct buffer
   }
 }
 
-static void
-unmap_buffers(struct page *page, loff_t pos) {
-  struct buffer_head *bh ;
-  struct buffer_head *head ;
-  struct buffer_head *next ;
-  unsigned long tail_index ;
-  unsigned long cur_index ;
-
-  if (page) {
-    if (page->buffers) {
-      tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
-      cur_index = 0 ;
-      head = page->buffers ;
-      bh = head ;
-      do {
-	next = bh->b_this_page ;
-
-        /* we want to unmap the buffers that contain the tail, and
-        ** all the buffers after it (since the tail must be at the
-        ** end of the file).  We don't want to unmap file data 
-        ** before the tail, since it might be dirty and waiting to 
-        ** reach disk
-        */
-        cur_index += bh->b_size ;
-        if (cur_index > tail_index) {
-          reiserfs_unmap_buffer(bh) ;
-        }
-	bh = next ;
-      } while (bh != head) ;
-    }
-  } 
-}
-
 /* this first locks inode (neither reads nor sync are permitted),
    reads tail through page cache, insert direct item. When direct item
    inserted successfully inode is left locked. Return value is always
@@ -274,11 +241,6 @@ int indirect2direct (struct reiserfs_tra
     }
     kunmap(page) ;
 
-    /* this will invalidate all the buffers in the page after
-    ** pos1
-    */
-    unmap_buffers(page, pos1) ;
-
     // note: we have now the same as in above direct2indirect
     // conversion: there are two keys which have matching first three
     // key components. They only differ by the fouhth one.
diff -rupBb linux-2.4.24.orig/include/linux/fs.h linux-2.4.24.new/include/linux/fs.h
--- linux-2.4.24.orig/include/linux/fs.h	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/include/linux/fs.h	Tue Mar 16 13:17:18 2004
@@ -1210,6 +1210,8 @@ static inline int atomic_set_buffer_dirt
 	return test_and_set_bit(BH_Dirty, &bh->b_state);
 }
 
+extern void buffer_insert_list_journal_head(struct buffer_head *bh, struct list_head *list, void *journal_head);
+
 static inline void mark_buffer_async(struct buffer_head * bh, int on)
 {
 	if (on)
@@ -1459,6 +1461,7 @@ typedef int (get_block_t)(struct inode*,
 /* Generic buffer handling for block filesystems.. */
 extern int try_to_release_page(struct page * page, int gfp_mask);
 extern int discard_bh_page(struct page *, unsigned long, int);
+extern void discard_buffer(struct buffer_head *bh) ;
 #define block_flushpage(page, offset) discard_bh_page(page, offset, 1)
 #define block_invalidate_page(page) discard_bh_page(page, 0, 0)
 extern int block_symlink(struct inode *, const char *, int);
diff -rupBb linux-2.4.24.orig/include/linux/reiserfs_fs.h linux-2.4.24.new/include/linux/reiserfs_fs.h
--- linux-2.4.24.orig/include/linux/reiserfs_fs.h	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/include/linux/reiserfs_fs.h	Tue Mar 16 13:43:45 2004
@@ -1329,8 +1329,7 @@ static inline loff_t max_reiserfs_offset
 #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter)
 #define get_generation(s) atomic_read (&fs_generation(s))
 #define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
-#define fs_changed(gen,s) (gen != get_generation (s))
-
+#define fs_changed(gen,s) (gen != get_generation(s))
 
 /***************************************************************************/
 /*                  FIXATE NODES                                           */
@@ -1653,6 +1652,86 @@ struct reiserfs_journal_header {
   /* 12 */ struct journal_params jh_journal;
 } ;
 
+static inline int
+reiserfs_file_data_log(struct inode *inode) {
+    if (reiserfs_data_log(inode->i_sb) || 
+       (inode->u.reiserfs_i.i_flags & i_data_log))
+    {
+        return 1 ;
+    }
+    return 0 ;
+}
+
+/* flags for the nested transaction handle */
+#define REISERFS_PERSISTENT_HANDLE 1
+#define REISERFS_ACTIVE_HANDLE 2
+#define REISERFS_CLOSE_NESTED 4
+#define REISERFS_DANGLING_HANDLE 8 
+/*
+** transaction handle which is passed around for all journal calls
+*/
+struct reiserfs_transaction_handle {
+  struct super_block *t_super ; /* super for this FS when journal_begin was 
+				   called. saves calls to reiserfs_get_super 
+				   also used by nested transactions to make
+				   sure they are nesting on the right FS
+				   _must_ be first in the handle 
+				*/
+  int t_refcount;
+  int t_blocks_logged ;         /* number of blocks this writer has logged */
+  int t_blocks_allocated ;      /* number of blocks this writer allocated */
+  unsigned long t_trans_id ;    /* sanity check, equals the current trans id */
+  int t_flags ;
+  void *t_handle_save ;		/* save existing current->journal_info */
+  int displace_new_blocks:1;    /* if new block allocation occurs, that
+  				   block should be displaced from others */
+} ;
+
+static inline int
+reiserfs_dangling_handle(struct reiserfs_transaction_handle *th) {
+    return (th && (th->t_flags & REISERFS_DANGLING_HANDLE)) ;
+}
+
+static inline void
+reiserfs_set_handle_dangling(struct reiserfs_transaction_handle *th) {
+    th->t_flags |= REISERFS_DANGLING_HANDLE ;
+}
+
+static inline void
+reiserfs_clear_handle_dangling(struct reiserfs_transaction_handle *th) {
+    th->t_flags &= ~REISERFS_DANGLING_HANDLE ;
+}
+
+static inline int 
+reiserfs_persistent_handle(struct reiserfs_transaction_handle *th) {
+    return (th && (th->t_flags & REISERFS_PERSISTENT_HANDLE)) ;
+}
+
+static inline void 
+reiserfs_set_handle_persistent(struct reiserfs_transaction_handle *th) {
+    th->t_flags |= REISERFS_PERSISTENT_HANDLE ;
+}
+
+static inline int
+reiserfs_active_handle(struct reiserfs_transaction_handle *th) {
+    return (th && (th->t_flags & REISERFS_ACTIVE_HANDLE)) ;
+}
+
+static inline void
+reiserfs_set_handle_active(struct reiserfs_transaction_handle *th) {
+    th->t_flags |= REISERFS_ACTIVE_HANDLE ;
+}
+
+static inline int
+reiserfs_restartable_handle(struct reiserfs_transaction_handle *th) {
+    return (th && (th->t_flags & REISERFS_CLOSE_NESTED)) ;
+}
+
+static inline void
+reiserfs_set_handle_restartable(struct reiserfs_transaction_handle *th) {
+    th->t_flags |= REISERFS_CLOSE_NESTED ;
+}
+
 extern task_queue reiserfs_commit_thread_tq ;
 extern wait_queue_head_t reiserfs_commit_thread_wait ;
 
@@ -1693,6 +1772,7 @@ extern wait_queue_head_t reiserfs_commit
 */
 #define JOURNAL_BUFFER(j,n) ((j)->j_ap_blocks[((j)->j_start + (n)) % JOURNAL_BLOCK_COUNT])
 
+int reiserfs_journal_cache_init(void);
 int reiserfs_flush_old_commits(struct super_block *);
 void reiserfs_commit_for_inode(struct inode *) ;
 void reiserfs_commit_for_tail(struct inode *) ;
@@ -1702,6 +1782,18 @@ void reiserfs_wait_on_write_block(struct
 void reiserfs_block_writes(struct reiserfs_transaction_handle *th) ;
 void reiserfs_allow_writes(struct super_block *s) ;
 void reiserfs_check_lock_depth(char *caller) ;
+int journal_mark_dirty(struct reiserfs_transaction_handle *, 
+                       struct super_block *, struct buffer_head *bh) ;
+
+static inline int reiserfs_transaction_running(struct super_block *s) {
+    struct reiserfs_transaction_handle *th = current->journal_info ;
+    if (th && th->t_super == s)
+        return 1 ;
+    if (th && th->t_super == NULL)
+        BUG();
+    return 0 ;
+}
+
 void reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait) ;
 void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh) ;
 struct buffer_head  * journal_bread (struct super_block *s, int block);
@@ -1717,8 +1809,14 @@ int journal_mark_freed(struct reiserfs_t
 int push_journal_writer(char *w) ;
 int pop_journal_writer(int windex) ;
 int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ;
+int reiserfs_restart_transaction(struct reiserfs_transaction_handle *, int) ;
 int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, int bmap_nr, int bit_nr, int size, int searchall, unsigned int *next) ;
 int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ;
+
+/* allocates a transaction handle, and starts a new transaction it */
+struct reiserfs_transaction_handle *
+reiserfs_persistent_transaction(struct super_block *p_s_sb, unsigned long) ;
+
 struct super_block *reiserfs_get_super(kdev_t dev) ;
 void flush_async_commits(struct super_block *p_s_sb) ;
 
@@ -1896,8 +1994,18 @@ int reiserfs_new_inode (struct reiserfs_
                                int i_size,
                                struct dentry *dentry,
                                struct inode *inode);
-int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode);
-void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode);
+
+int reiserfs_sync_inode (struct reiserfs_transaction_handle *th,
+                         struct inode * inode);
+
+void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th,
+                              struct inode * inode, loff_t size);
+
+static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
+                                      struct inode *inode)
+{
+    reiserfs_update_sd_size(th, inode, inode->i_size) ;
+}
 
 void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode );
 void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs );
diff -rupBb linux-2.4.24.orig/include/linux/reiserfs_fs_i.h linux-2.4.24.new/include/linux/reiserfs_fs_i.h
--- linux-2.4.24.orig/include/linux/reiserfs_fs_i.h	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/include/linux/reiserfs_fs_i.h	Tue Mar 16 13:17:18 2004
@@ -6,6 +6,8 @@
 
 #include <linux/list.h>
 
+struct reiserfs_journal_list;
+
 /** bitmasks for i_flags field in reiserfs-specific part of inode */
 typedef enum {
     /** this says what format of key do all items (but stat data) of
@@ -23,7 +25,9 @@ typedef enum {
 	truncate or unlink. Safe link is used to avoid leakage of disk
 	space on crash with some files open, but unlinked. */
     i_link_saved_unlink_mask   =  0x0010,
-    i_link_saved_truncate_mask =  0x0020
+    i_link_saved_truncate_mask =  0x0020,
+    /** are we logging data blocks for this file? */
+    i_data_log                 =  0x0040,
 } reiserfs_inode_flags;
 
 
@@ -52,14 +56,14 @@ struct reiserfs_inode_info {
     ** needs to be committed in order for this inode to be properly
     ** flushed */
     unsigned long i_trans_id ;
-    unsigned long i_trans_index ;
+    struct reiserfs_journal_list *i_jl;
 
     /* direct io needs to make sure the tail is on disk to avoid
      * buffer alias problems.  This records the transaction last
      * involved in a direct->indirect conversion for this file
      */
     unsigned long i_tail_trans_id;
-    unsigned long i_tail_trans_index;
+    struct reiserfs_journal_list *i_tail_jl;
 };
 
 #endif
diff -rupBb linux-2.4.24.orig/include/linux/reiserfs_fs_sb.h linux-2.4.24.new/include/linux/reiserfs_fs_sb.h
--- linux-2.4.24.orig/include/linux/reiserfs_fs_sb.h	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/include/linux/reiserfs_fs_sb.h	Tue Mar 16 13:17:18 2004
@@ -120,7 +120,6 @@ typedef enum {
 #define JOURNAL_MAX_CNODE   1500 /* max cnodes to allocate. */
 #define JOURNAL_HASH_SIZE 8192   
 #define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating.  Must be >= 2 */
-#define JOURNAL_LIST_COUNT 64
 
 /* these are bh_state bit flag offset numbers, for use in the buffer head */
 
@@ -167,20 +166,27 @@ struct reiserfs_list_bitmap {
   struct reiserfs_bitmap_node **bitmaps ;
 } ;
 
-/*
-** transaction handle which is passed around for all journal calls
-*/
-struct reiserfs_transaction_handle {
-				/* ifdef it. -Hans */
-  char *t_caller ;              /* debugging use */
-  int t_blocks_logged ;         /* number of blocks this writer has logged */
-  int t_blocks_allocated ;      /* number of blocks this writer allocated */
-  unsigned long t_trans_id ;    /* sanity check, equals the current trans id */
-  struct super_block *t_super ; /* super for this FS when journal_begin was 
-                                   called. saves calls to reiserfs_get_super */
-  int displace_new_blocks:1;	/* if new block allocation occurres, that block
-				   should be displaced from others */
-} ;
+struct reiserfs_journal_list;
+
+/* so, we're using fsync_buffers_list to do the ordered buffer writes,
+ * but we don't want to have a full inode on each buffer list, it is 
+ * a big waste of space.
+ *
+ * instead we copy the very head of the inode into a list here, a kludge
+ * but much smaller.
+ */
+struct reiserfs_inode_list {
+    struct list_head        i_hash;
+    struct list_head        i_list;
+    struct list_head        i_dentry;
+    struct list_head        i_dirty_buffers;
+
+    /* we could be very smart and do math based on the location
+     * of the inode list in the journal list struct.
+     * lets do that after this works properly
+     */
+    struct reiserfs_journal_list *jl;
+};
 
 /*
 ** one of these for each transaction.  The most important part here is the j_realblock.
@@ -190,20 +196,32 @@ struct reiserfs_transaction_handle {
 ** to be overwritten */
 struct reiserfs_journal_list {
   unsigned long j_start ;
+  unsigned long j_state ;
   unsigned long j_len ;
   atomic_t j_nonzerolen ;
   atomic_t j_commit_left ;
-  atomic_t j_flushing ;
-  atomic_t j_commit_flushing ;
   atomic_t j_older_commits_done ;      /* all commits older than this on disk*/
+  struct semaphore j_commit_lock ;
   unsigned long j_trans_id ;
   time_t j_timestamp ;
   struct reiserfs_list_bitmap *j_list_bitmap ;
   struct buffer_head *j_commit_bh ; /* commit buffer head */
   struct reiserfs_journal_cnode *j_realblock  ;
   struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans.  free each of these on flush */
-  wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */
-  wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */
+
+  /* time ordered list of all the active transactions */
+  struct list_head j_list;
+
+  /* time ordered list of all transactions not touched by kreiserfsd */
+  struct list_head j_working_list;
+
+  /* for data=ordered support */
+  struct list_head j_ordered_bh_list;
+
+  /* sigh, the tails have slightly different rules for flushing, they
+   * need their own list
+   */
+  struct list_head j_tail_bh_list;
 } ;
 
 struct reiserfs_page_list  ; /* defined in reiserfs_fs.h */
@@ -230,15 +248,11 @@ struct reiserfs_journal {
   unsigned long j_last_flush_trans_id ;    /* last fully flushed journal timestamp */
   struct buffer_head *j_header_bh ;   
 
-  /* j_flush_pages must be flushed before the current transaction can
-  ** commit
-  */
-  struct reiserfs_page_list *j_flush_pages ;
   time_t j_trans_start_time ;         /* time this transaction started */
   struct semaphore j_lock ;
+  struct semaphore j_flush_sem ;
   wait_queue_head_t j_join_wait ;    /* wait for current transaction to finish before starting new one */
   atomic_t j_jlock ;                       /* lock for j_join_wait */
-  int j_journal_list_index ;	      /* journal list number of the current trans */
   int j_list_bitmap_index ;	      /* number of next list bitmap to use */
   int j_must_wait ;		       /* no more journal begins allowed. MUST sleep on j_join_wait */
   int j_next_full_flush ;             /* next journal_end will flush all journal list */
@@ -254,13 +268,28 @@ struct reiserfs_journal {
 
   struct reiserfs_journal_cnode *j_cnode_free_list ;
   struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */
+  struct reiserfs_journal_list *j_current_jl;
 
   int j_free_bitmap_nodes ;
   int j_used_bitmap_nodes ;
+  int j_num_lists;      /* total number of active transactions */
+  int j_num_work_lists; /* number that need attention from kreiserfsd */
+
+  /* debugging to make sure things are flushed in order */
+  int j_last_flush_id;
+
+  /* debugging to make sure things are committed in order */
+  int j_last_commit_id;
+
   struct list_head j_bitmap_nodes ;
-  struct list_head j_dirty_buffers ;
+
+  /* list of all active transactions */
+  struct list_head j_journal_list;
+
+  /* lists that haven't been touched by kreiserfsd */
+  struct list_head j_working_list;
+
   struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ;	/* array of bitmaps to record the deleted blocks */
-  struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ;	    /* array of all the journal lists */
   struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; 	    /* hash table for real buffer heads in current trans */ 
   struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all 
   										the transactions */
@@ -420,11 +449,12 @@ struct reiserfs_sb_info
 #define REISERFS_3_6 1
 
 /* Mount options */
-#define REISERFS_LARGETAIL 0  /* large tails will be created in a session */
-#define REISERFS_SMALLTAIL 17  /* small (for files less than block size) tails will be created in a session */
-#define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */
-#define REISERFS_NOLOG 4      /* -o nolog: turn journalling off */
-#define REISERFS_CONVERT 5    /* -o conv: causes conversion of old
+enum {
+    REISERFS_LARGETAIL, /* large tails will be created in a session */
+    REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */
+    REPLAYONLY,          /* replay journal and return 0. Use by fsck */
+    REISERFS_NOLOG,      /* -o nolog: turn journalling off */
+    REISERFS_CONVERT,    /* -o conv: causes conversion of old
                                  format super block to the new
                                  format. If not specified - old
                                  partition will be dealt with in a
@@ -438,27 +468,25 @@ struct reiserfs_sb_info
 ** the existing hash on the FS, so if you have a tea hash disk, and mount
 ** with -o hash=rupasov, the mount will fail.
 */
-#define FORCE_TEA_HASH 6      /* try to force tea hash on mount */
-#define FORCE_RUPASOV_HASH 7  /* try to force rupasov hash on mount */
-#define FORCE_R5_HASH 8       /* try to force rupasov hash on mount */
-#define FORCE_HASH_DETECT 9   /* try to detect hash function on mount */
+    FORCE_TEA_HASH,       /* try to force tea hash on mount */
+    FORCE_RUPASOV_HASH,   /* try to force rupasov hash on mount */
+    FORCE_R5_HASH,        /* try to force rupasov hash on mount */
+    FORCE_HASH_DETECT,    /* try to detect hash function on mount */
 
 
 /* used for testing experimental features, makes benchmarking new
    features with and without more convenient, should never be used by
    users in any code shipped to users (ideally) */
 
-#define REISERFS_NO_BORDER 11
-#define REISERFS_NO_UNHASHED_RELOCATION 12
-#define REISERFS_HASHED_RELOCATION 13
-#define REISERFS_TEST4 14 
-
-#define REISERFS_TEST1 11
-#define REISERFS_TEST2 12
-#define REISERFS_TEST3 13
-#define REISERFS_TEST4 14 
-
-#define REISERFS_ATTRS (15)
+    REISERFS_NO_BORDER,
+    REISERFS_NO_UNHASHED_RELOCATION,
+    REISERFS_HASHED_RELOCATION,
+    REISERFS_DATA_LOG,
+    REISERFS_DATA_ORDERED,
+    REISERFS_DATA_WRITEBACK,
+    REISERFS_ATTRS,
+    REISERFS_TEST4,
+};
 
 #define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH))
 #define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH))
@@ -467,6 +495,9 @@ struct reiserfs_sb_info
 #define reiserfs_no_border(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_BORDER))
 #define reiserfs_no_unhashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
 #define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
+#define reiserfs_data_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_LOG))
+#define reiserfs_data_ordered(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_ORDERED))
+#define reiserfs_data_writeback(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
 #define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4))
 
 #define have_large_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_LARGETAIL))
@@ -480,7 +511,6 @@ struct reiserfs_sb_info
 
 void reiserfs_file_buffer (struct buffer_head * bh, int list);
 int reiserfs_is_super(struct super_block *s)  ;
-int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ;
 int show_reiserfs_locks(void) ;
 int reiserfs_resize(struct super_block *, unsigned long) ;
 
@@ -491,8 +521,6 @@ int reiserfs_resize(struct super_block *
 #define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh)
 #define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal)
 #define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
-#define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list)
-#define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index) 
 #define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) 
 #define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap)
 
diff -rupBb linux-2.4.24.orig/kernel/ksyms.c linux-2.4.24.new/kernel/ksyms.c
--- linux-2.4.24.orig/kernel/ksyms.c	Tue Mar 16 13:43:29 2004
+++ linux-2.4.24.new/kernel/ksyms.c	Tue Mar 16 13:17:18 2004
@@ -172,6 +172,7 @@ EXPORT_SYMBOL(d_alloc);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(__d_path);
 EXPORT_SYMBOL(mark_buffer_dirty);
+EXPORT_SYMBOL(discard_buffer);      /* for FS flushpage funcs */
 EXPORT_SYMBOL(set_buffer_async_io); /* for reiserfs_writepage */
 EXPORT_SYMBOL(__mark_buffer_dirty);
 EXPORT_SYMBOL(__mark_inode_dirty);
