diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 2d0bcb479075..e5252c4d95c9 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -1241,6 +1241,36 @@ TRACE_EVENT(xfs_alloc_busysearch, __print_symbolic(__entry->found, XFS_BUSY_STATES)) ); +TRACE_EVENT(xfs_alloc_busy_trim, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len, + xfs_agblock_t tbno, xfs_extlen_t tlen), + TP_ARGS(mp, agno, agbno, len, tbno, tlen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(xfs_agblock_t, tbno) + __field(xfs_extlen_t, tlen) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->tbno = tbno; + __entry->tlen = tlen; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->tbno, + __entry->tlen) +); + TRACE_EVENT(xfs_trans_commit_lsn, TP_PROTO(struct xfs_trans *trans), TP_ARGS(trans), @@ -1433,11 +1463,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first); DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); DEFINE_ALLOC_EVENT(xfs_alloc_near_error); +DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); +DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_size_done); DEFINE_ALLOC_EVENT(xfs_alloc_size_error); +DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); DEFINE_ALLOC_EVENT(xfs_alloc_small_done); diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 37c4952c2f53..ff54ef428348 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -41,19 +41,13 @@ #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -/* - * Prototypes for per-ag allocation routines - */ - STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); - -/* - * Internal functions. - */ + xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); +STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, + xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); /* * Lookup the record equal to [bno, len] in the btree given by cur. @@ -154,19 +148,21 @@ xfs_alloc_compute_aligned( xfs_extlen_t *reslen) /* result length */ { xfs_agblock_t bno; - xfs_extlen_t diff; xfs_extlen_t len; - if (args->alignment > 1 && foundlen >= args->minlen) { - bno = roundup(foundbno, args->alignment); - diff = bno - foundbno; - len = diff >= foundlen ? 0 : foundlen - diff; + /* Trim busy sections out of found extent */ + xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); + + if (args->alignment > 1 && len >= args->minlen) { + xfs_agblock_t aligned_bno = roundup(bno, args->alignment); + xfs_extlen_t diff = aligned_bno - bno; + + *resbno = aligned_bno; + *reslen = diff >= len ? 0 : len - diff; } else { - bno = foundbno; - len = foundlen; + *resbno = bno; + *reslen = len; } - *resbno = bno; - *reslen = len; } /* @@ -541,16 +537,8 @@ xfs_alloc_ag_vextent( if (error) return error; - /* - * Search the busylist for these blocks and mark the - * transaction as synchronous if blocks are found. This - * avoids the need to block due to a synchronous log - * force to ensure correct ordering as the synchronous - * transaction will guarantee that for us. - */ - if (xfs_alloc_busy_search(args->mp, args->agno, - args->agbno, args->len)) - xfs_trans_set_sync(args->tp); + ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, + args->agbno, args->len)); } if (!args->isfl) { @@ -577,14 +565,14 @@ xfs_alloc_ag_vextent_exact( { xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ - xfs_agblock_t end; /* end of allocated extent */ int error; xfs_agblock_t fbno; /* start block of found extent */ - xfs_agblock_t fend; /* end block of found extent */ xfs_extlen_t flen; /* length of found extent */ + xfs_agblock_t tbno; /* start block of trimmed extent */ + xfs_extlen_t tlen; /* length of trimmed extent */ + xfs_agblock_t tend; /* end block of trimmed extent */ + xfs_agblock_t end; /* end of allocated extent */ int i; /* success/failure of operation */ - xfs_agblock_t maxend; /* end of maximal extent */ - xfs_agblock_t minend; /* end of minimal extent */ xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); @@ -614,14 +602,22 @@ xfs_alloc_ag_vextent_exact( goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); ASSERT(fbno <= args->agbno); - minend = args->agbno + args->minlen; - maxend = args->agbno + args->maxlen; - fend = fbno + flen; /* - * Give up if the freespace isn't long enough for the minimum request. + * Check for overlapping busy extents. */ - if (fend < minend) + xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); + + /* + * Give up if the start of the extent is busy, or the freespace isn't + * long enough for the minimum request. + */ + if (tbno > args->agbno) + goto not_found; + if (tlen < args->minlen) + goto not_found; + tend = tbno + tlen; + if (tend < args->agbno + args->minlen) goto not_found; /* @@ -630,14 +626,14 @@ xfs_alloc_ag_vextent_exact( * * Fix the length according to mod and prod if given. */ - end = XFS_AGBLOCK_MIN(fend, maxend); + end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); args->len = end - args->agbno; xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) goto not_found; rlen = args->len; - ASSERT(args->agbno + rlen <= fend); + ASSERT(args->agbno + rlen <= tend); end = args->agbno + rlen; /* @@ -686,11 +682,11 @@ xfs_alloc_find_best_extent( struct xfs_btree_cur **scur, /* searching cursor */ xfs_agblock_t gdiff, /* difference for search comparison */ xfs_agblock_t *sbno, /* extent found by search */ - xfs_extlen_t *slen, - xfs_extlen_t *slena, /* aligned length */ + xfs_extlen_t *slen, /* extent length */ + xfs_agblock_t *sbnoa, /* aligned extent found by search */ + xfs_extlen_t *slena, /* aligned extent length */ int dir) /* 0 = search right, 1 = search left */ { - xfs_agblock_t bno; xfs_agblock_t new; xfs_agblock_t sdiff; int error; @@ -708,16 +704,16 @@ xfs_alloc_find_best_extent( if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); + xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); /* * The good extent is closer than this one. */ if (!dir) { - if (bno >= args->agbno + gdiff) + if (*sbnoa >= args->agbno + gdiff) goto out_use_good; } else { - if (bno <= args->agbno - gdiff) + if (*sbnoa <= args->agbno - gdiff) goto out_use_good; } @@ -729,8 +725,8 @@ xfs_alloc_find_best_extent( xfs_alloc_fix_len(args); sdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, *sbno, - *slen, &new); + args->alignment, *sbnoa, + *slena, &new); /* * Choose closer size and invalidate other cursor. @@ -780,7 +776,7 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t gtbnoa; /* aligned ... */ xfs_extlen_t gtdiff; /* difference to right side entry */ xfs_extlen_t gtlen; /* length of right side entry */ - xfs_extlen_t gtlena = 0; /* aligned ... */ + xfs_extlen_t gtlena; /* aligned ... */ xfs_agblock_t gtnew; /* useful start bno of right side */ int error; /* error code */ int i; /* result code, temporary */ @@ -789,9 +785,10 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t ltbnoa; /* aligned ... */ xfs_extlen_t ltdiff; /* difference to left side entry */ xfs_extlen_t ltlen; /* length of left side entry */ - xfs_extlen_t ltlena = 0; /* aligned ... */ + xfs_extlen_t ltlena; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; #if defined(DEBUG) && defined(__KERNEL__) /* * Randomly don't execute the first algorithm. @@ -800,13 +797,20 @@ xfs_alloc_ag_vextent_near( dofirst = random32() & 1; #endif + +restart: + bno_cur_lt = NULL; + bno_cur_gt = NULL; + ltlen = 0; + gtlena = 0; + ltlena = 0; + /* * Get a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); - ltlen = 0; - bno_cur_lt = bno_cur_gt = NULL; + /* * See if there are any free extents as big as maxlen. */ @@ -822,11 +826,13 @@ xfs_alloc_ag_vextent_near( goto error0; if (i == 0 || ltlen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_near_noentry(args); return 0; } ASSERT(i == 1); } args->wasfromfl = 0; + /* * First algorithm. * If the requested extent is large wrt the freespaces available @@ -890,7 +896,7 @@ xfs_alloc_ag_vextent_near( if (args->len < blen) continue; ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbno, ltlen, <new); + args->alignment, ltbnoa, ltlena, <new); if (ltnew != NULLAGBLOCK && (args->len > blen || ltdiff < bdiff)) { bdiff = ltdiff; @@ -1042,11 +1048,12 @@ xfs_alloc_ag_vextent_near( args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbno, ltlen, <new); + args->alignment, ltbnoa, ltlena, <new); error = xfs_alloc_find_best_extent(args, &bno_cur_lt, &bno_cur_gt, - ltdiff, >bno, >len, >lena, + ltdiff, >bno, >len, + >bnoa, >lena, 0 /* search right */); } else { ASSERT(gtlena >= args->minlen); @@ -1057,11 +1064,12 @@ xfs_alloc_ag_vextent_near( args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); xfs_alloc_fix_len(args); gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, gtbno, gtlen, >new); + args->alignment, gtbnoa, gtlena, >new); error = xfs_alloc_find_best_extent(args, &bno_cur_gt, &bno_cur_lt, - gtdiff, <bno, <len, <lena, + gtdiff, <bno, <len, + <bnoa, <lena, 1 /* search left */); } @@ -1073,6 +1081,12 @@ xfs_alloc_ag_vextent_near( * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + if (!forced++) { + trace_xfs_alloc_near_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; @@ -1107,12 +1121,13 @@ xfs_alloc_ag_vextent_near( return 0; } rlen = args->len; - (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, - ltlen, <new); + (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, + ltbnoa, ltlena, <new); ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltbno + ltlen); + ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->agbno = ltnew; + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, ltnew, rlen, XFSA_FIXUP_BNO_OK))) goto error0; @@ -1155,26 +1170,35 @@ xfs_alloc_ag_vextent_size( int i; /* temp status variable */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; +restart: /* * Allocate and initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); bno_cur = NULL; + /* * Look for an entry >= maxlen+alignment-1 blocks. */ if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen + args->alignment - 1, &i))) goto error0; + /* - * If none, then pick up the last entry in the tree unless the - * tree is empty. + * If none or we have busy extents that we cannot allocate from, then + * we have to settle for a smaller extent. In the case that there are + * no large extents, this will return the last entry in the tree unless + * the tree is empty. In the case that there are only busy large + * extents, this will return the largest small extent unless there + * are no smaller extents available. */ - if (!i) { - if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, - &flen, &i))) + if (!i || forced > 1) { + error = xfs_alloc_ag_vextent_small(args, cnt_cur, + &fbno, &flen, &i); + if (error) goto error0; if (i == 0 || flen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); @@ -1182,22 +1206,56 @@ xfs_alloc_ag_vextent_size( return 0; } ASSERT(i == 1); + xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); + } else { + /* + * Search for a non-busy extent that is large enough. + * If we are at low space, don't check, or if we fall of + * the end of the btree, turn off the busy check and + * restart. + */ + for (;;) { + error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); + + if (rlen >= args->maxlen) + break; + + error = xfs_btree_increment(cnt_cur, 0, &i); + if (error) + goto error0; + if (i == 0) { + /* + * Our only valid extents must have been busy. + * Make it unbusy by forcing the log out and + * retrying. If we've been here before, forcing + * the log isn't making the extents available, + * which means they have probably been freed in + * this transaction. In that case, we have to + * give up on them and we'll attempt a minlen + * allocation the next time around. + */ + xfs_btree_del_cursor(cnt_cur, + XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + if (!forced++) + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + } } - /* - * There's a freespace as big as maxlen+alignment-1, get it. - */ - else { - if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } + /* * In the first case above, we got the last entry in the * by-size btree. Now we check to see if the space hits maxlen * once aligned; if not, we search left for something better. * This can't happen in the second case above. */ - xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), error0); @@ -1251,13 +1309,19 @@ xfs_alloc_ag_vextent_size( * Fix up the length. */ args->len = rlen; - xfs_alloc_fix_len(args); - if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_size_nominleft(args); - args->agbno = NULLAGBLOCK; - return 0; + if (rlen < args->minlen) { + if (!forced++) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + goto out_nominleft; } + xfs_alloc_fix_len(args); + + if (!xfs_alloc_fix_minleft(args)) + goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); /* @@ -1287,6 +1351,12 @@ error0: if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); return error; + +out_nominleft: + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_nominleft(args); + args->agbno = NULLAGBLOCK; + return 0; } /* @@ -2650,6 +2720,185 @@ xfs_alloc_busy_search( return match; } +/* + * For a given extent [fbno, flen], search the busy extent list to find a + * subset of the extent that is not busy. If *rlen is smaller than + * args->minlen no suitable extent could be found, and the higher level + * code needs to force out the log and retry the allocation. + */ +STATIC void +xfs_alloc_busy_trim( + struct xfs_alloc_arg *args, + xfs_agblock_t bno, + xfs_extlen_t len, + xfs_agblock_t *rbno, + xfs_extlen_t *rlen) +{ + xfs_agblock_t fbno = bno; + xfs_extlen_t flen = len; + struct rb_node *rbp; + + ASSERT(flen > 0); + + spin_lock(&args->pag->pagb_lock); + rbp = args->pag->pagb_tree.rb_node; + while (rbp && flen >= args->minlen) { + struct xfs_busy_extent *busyp = + rb_entry(rbp, struct xfs_busy_extent, rb_node); + xfs_agblock_t fend = fbno + flen; + xfs_agblock_t bbno = busyp->bno; + xfs_agblock_t bend = bbno + busyp->length; + + if (fend <= bbno) { + rbp = rbp->rb_left; + continue; + } else if (fbno >= bend) { + rbp = rbp->rb_right; + continue; + } + + if (bbno <= fbno) { + /* start overlap */ + + /* + * Case 1: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +---------+ + * fbno fend + * + * Case 2: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-------------+ + * fbno fend + * + * Case 3: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-------------+ + * fbno fend + * + * Case 4: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-----------------+ + * fbno fend + * + * No unbusy region in extent, return failure. + */ + if (fend <= bend) + goto fail; + + /* + * Case 5: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +----------------------+ + * fbno fend + * + * Case 6: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +--------------------------+ + * fbno fend + * + * Needs to be trimmed to: + * +-------+ + * fbno fend + */ + fbno = bend; + } else if (bend >= fend) { + /* end overlap */ + + /* + * Case 7: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +------------------+ + * fbno fend + * + * Case 8: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +--------------------------+ + * fbno fend + * + * Needs to be trimmed to: + * +-------+ + * fbno fend + */ + fend = bbno; + } else { + /* middle overlap */ + + /* + * Case 9: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-----------------------------------+ + * fbno fend + * + * Can be trimmed to: + * +-------+ OR +-------+ + * fbno fend fbno fend + * + * Backward allocation leads to significant + * fragmentation of directories, which degrades + * directory performance, therefore we always want to + * choose the option that produces forward allocation + * patterns. + * Preferring the lower bno extent will make the next + * request use "fend" as the start of the next + * allocation; if the segment is no longer busy at + * that point, we'll get a contiguous allocation, but + * even if it is still busy, we will get a forward + * allocation. + * We try to avoid choosing the segment at "bend", + * because that can lead to the next allocation + * taking the segment at "fbno", which would be a + * backward allocation. We only use the segment at + * "fbno" if it is much larger than the current + * requested size, because in that case there's a + * good chance subsequent allocations will be + * contiguous. + */ + if (bbno - fbno >= args->maxlen) { + /* left candidate fits perfect */ + fend = bbno; + } else if (fend - bend >= args->maxlen * 4) { + /* right candidate has enough free space */ + fbno = bend; + } else if (bbno - fbno >= args->minlen) { + /* left candidate fits minimum requirement */ + fend = bbno; + } else { + goto fail; + } + } + + flen = fend - fbno; + } + spin_unlock(&args->pag->pagb_lock); + + if (fbno != bno || flen != len) { + trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, + fbno, flen); + } + *rbno = fbno; + *rlen = flen; + return; +fail: + /* + * Return a zero extent length as failure indications. All callers + * re-check if the trimmed extent satisfies the minlen requirement. + */ + spin_unlock(&args->pag->pagb_lock); + trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); + *rbno = fbno; + *rlen = 0; +} + void xfs_alloc_busy_clear( struct xfs_mount *mp,