Merge branch 'for-3.15/drivers' of git://git.kernel.dk/linux-block

Pull block driver update from Jens Axboe:
 "On top of the core pull request, here's the pull request for the
  driver related changes for 3.15.  It contains:

   - Improvements for msi-x registration for block drivers (mtip32xx,
     skd, cciss, nvme) from Alexander Gordeev.

   - A round of cleanups and improvements for drbd from Andreas
     Gruenbacher and Rashika Kheria.

   - A round of clanups and improvements for bcache from Kent.

   - Removal of sleep_on() and friends in DAC960, ataflop, swim3 from
     Arnd Bergmann.

   - Bug fix for a bug in the mtip32xx async completion code from Sam
     Bradshaw.

   - Bug fix for accidentally bouncing IO on 32-bit platforms with
     mtip32xx from Felipe Franciosi"

* 'for-3.15/drivers' of git://git.kernel.dk/linux-block: (103 commits)
  bcache: remove nested function usage
  bcache: Kill bucket->gc_gen
  bcache: Kill unused freelist
  bcache: Rework btree cache reserve handling
  bcache: Kill btree_io_wq
  bcache: btree locking rework
  bcache: Fix a race when freeing btree nodes
  bcache: Add a real GC_MARK_RECLAIMABLE
  bcache: Add bch_keylist_init_single()
  bcache: Improve priority_stats
  bcache: Better alloc tracepoints
  bcache: Kill dead cgroup code
  bcache: stop moving_gc marking buckets that can't be moved.
  bcache: Fix moving_pred()
  bcache: Fix moving_gc deadlocking with a foreground write
  bcache: Fix discard granularity
  bcache: Fix another bug recovering from unclean shutdown
  bcache: Fix a bug recovering from unclean shutdown
  bcache: Fix a journalling reclaim after recovery bug
  bcache: Fix a null ptr deref in journal replay
  ...
This commit is contained in:
Linus Torvalds 2014-04-01 19:43:53 -07:00
commit b33ce44299
47 changed files with 6622 additions and 6298 deletions

View file

@ -0,0 +1,38 @@
This describes the in kernel data structure for DRBD-9. Starting with
Linux v3.14 we are reorganizing DRBD to use this data structure.
Basic Data Structure
====================
A node has a number of DRBD resources. Each such resource has a number of
devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
device is represented by a block device locally.
The DRBD objects are interconnected to form a matrix as depicted below; a
drbd_peer_device object sits at each intersection between a drbd_device and a
drbd_connection:
/--------------+---------------+.....+---------------\
| resource | device | | device |
+--------------+---------------+.....+---------------+
| connection | peer_device | | peer_device |
+--------------+---------------+.....+---------------+
: : : : :
: : : : :
+--------------+---------------+.....+---------------+
| connection | peer_device | | peer_device |
\--------------+---------------+.....+---------------/
In this table, horizontally, devices can be accessed from resources by their
volume number. Likewise, peer_devices can be accessed from connections by
their volume number. Objects in the vertical direction are connected by double
linked lists. There are back pointers from peer_devices to their connections a
devices, and from connections and devices to their resource.
All resources are in the drbd_resources double-linked list. In addition, all
devices can be accessed by their minor device number via the drbd_devices idr.
The drbd_resource, drbd_connection, and drbd_device objects are reference
counted. The peer_device objects only serve to establish the links between
devices and connections; their lifetime is determined by the lifetime of the
device and connection which they reference.

View file

@ -6411,12 +6411,12 @@ static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller,
.ScatterGatherSegments[0]
.SegmentByteCount =
CommandMailbox->ControllerInfo.DataTransferSize;
DAC960_ExecuteCommand(Command);
while (Controller->V2.NewControllerInformation->PhysicalScanActive)
{
DAC960_ExecuteCommand(Command);
sleep_on_timeout(&Controller->CommandWaitQueue, HZ);
}
while (1) {
DAC960_ExecuteCommand(Command);
if (!Controller->V2.NewControllerInformation->PhysicalScanActive)
break;
msleep(1000);
}
DAC960_UserCritical("Discovery Completed\n", Controller);
}
}
@ -7035,18 +7035,16 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
ErrorCode = -EFAULT;
break;
}
while (Controller->V2.HealthStatusBuffer->StatusChangeCounter
== HealthStatusBuffer.StatusChangeCounter &&
Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
== HealthStatusBuffer.NextEventSequenceNumber)
{
interruptible_sleep_on_timeout(&Controller->HealthStatusWaitQueue,
DAC960_MonitoringTimerInterval);
if (signal_pending(current)) {
ErrorCode = -EINTR;
break;
}
}
ErrorCode = wait_event_interruptible_timeout(Controller->HealthStatusWaitQueue,
!(Controller->V2.HealthStatusBuffer->StatusChangeCounter
== HealthStatusBuffer.StatusChangeCounter &&
Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
== HealthStatusBuffer.NextEventSequenceNumber),
DAC960_MonitoringTimerInterval);
if (ErrorCode == -ERESTARTSYS) {
ErrorCode = -EINTR;
break;
}
if (copy_to_user(GetHealthStatus.HealthStatusBuffer,
Controller->V2.HealthStatusBuffer,
sizeof(DAC960_V2_HealthStatusBuffer_T)))

View file

@ -68,6 +68,8 @@
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <linux/wait.h>
#include <asm/atafd.h>
#include <asm/atafdreg.h>
@ -301,7 +303,7 @@ module_param_array(UserSteprate, int, NULL, 0);
/* Synchronization of FDC access. */
static volatile int fdc_busy = 0;
static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
static DECLARE_WAIT_QUEUE_HEAD(format_wait);
static DECLARE_COMPLETION(format_wait);
static unsigned long changed_floppies = 0xff, fake_change = 0;
#define CHECK_CHANGE_DELAY HZ/2
@ -608,7 +610,7 @@ static void fd_error( void )
if (IsFormatting) {
IsFormatting = 0;
FormatError = 1;
wake_up( &format_wait );
complete(&format_wait);
return;
}
@ -650,9 +652,8 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
DPRINT(("do_format( dr=%d tr=%d he=%d offs=%d )\n",
drive, desc->track, desc->head, desc->sect_offset ));
wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0);
local_irq_save(flags);
while( fdc_busy ) sleep_on( &fdc_wait );
fdc_busy = 1;
stdma_lock(floppy_irq, NULL);
atari_turnon_irq( IRQ_MFP_FDC ); /* should be already, just to be sure */
local_irq_restore(flags);
@ -706,7 +707,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
ReqSide = desc->head;
do_fd_action( drive );
sleep_on( &format_wait );
wait_for_completion(&format_wait);
redo_fd_request();
return( FormatError ? -EIO : 0 );
@ -1229,7 +1230,7 @@ static void fd_writetrack_done( int status )
goto err_end;
}
wake_up( &format_wait );
complete(&format_wait);
return;
err_end:
@ -1497,8 +1498,7 @@ repeat:
void do_fd_request(struct request_queue * q)
{
DPRINT(("do_fd_request for pid %d\n",current->pid));
while( fdc_busy ) sleep_on( &fdc_wait );
fdc_busy = 1;
wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0);
stdma_lock(floppy_irq, NULL);
atari_disable_irq( IRQ_MFP_FDC );

View file

@ -4092,11 +4092,9 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
if (err > 0) {
dev_warn(&h->pdev->dev,
"only %d MSI-X vectors available\n", err);
goto default_int_mode;
} else {
dev_warn(&h->pdev->dev,
"MSI-X init failed %d\n", err);
goto default_int_mode;
}
}
if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {

File diff suppressed because it is too large Load diff

View file

@ -113,54 +113,54 @@ struct drbd_bitmap {
};
#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
static void __bm_print_lock_info(struct drbd_device *device, const char *func)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
if (!__ratelimit(&drbd_ratelimit_state))
return;
dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
drbd_task_to_thread_name(mdev->tconn, current),
func, b->bm_why ?: "?",
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n",
current->comm, task_pid_nr(current),
func, b->bm_why ?: "?",
b->bm_task->comm, task_pid_nr(b->bm_task));
}
void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
int trylock_failed;
if (!b) {
dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
drbd_err(device, "FIXME no bitmap in drbd_bm_lock!?\n");
return;
}
trylock_failed = !mutex_trylock(&b->bm_change);
if (trylock_failed) {
dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
drbd_task_to_thread_name(mdev->tconn, current),
why, b->bm_why ?: "?",
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
drbd_warn(device, "%s[%d] going to '%s' but bitmap already locked for '%s' by %s[%d]\n",
current->comm, task_pid_nr(current),
why, b->bm_why ?: "?",
b->bm_task->comm, task_pid_nr(b->bm_task));
mutex_lock(&b->bm_change);
}
if (BM_LOCKED_MASK & b->bm_flags)
dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
drbd_err(device, "FIXME bitmap already locked in bm_lock\n");
b->bm_flags |= flags & BM_LOCKED_MASK;
b->bm_why = why;
b->bm_task = current;
}
void drbd_bm_unlock(struct drbd_conf *mdev)
void drbd_bm_unlock(struct drbd_device *device)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
if (!b) {
dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
drbd_err(device, "FIXME no bitmap in drbd_bm_unlock!?\n");
return;
}
if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
if (!(BM_LOCKED_MASK & device->bitmap->bm_flags))
drbd_err(device, "FIXME bitmap not locked in bm_unlock\n");
b->bm_flags &= ~BM_LOCKED_MASK;
b->bm_why = NULL;
@ -211,19 +211,19 @@ static unsigned long bm_page_to_idx(struct page *page)
/* As is very unlikely that the same page is under IO from more than one
* context, we can get away with a bit per page and one wait queue per bitmap.
*/
static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr)
static void bm_page_lock_io(struct drbd_device *device, int page_nr)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
void *addr = &page_private(b->bm_pages[page_nr]);
wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
}
static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
static void bm_page_unlock_io(struct drbd_device *device, int page_nr)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
void *addr = &page_private(b->bm_pages[page_nr]);
clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
wake_up(&mdev->bitmap->bm_io_wait);
wake_up(&device->bitmap->bm_io_wait);
}
/* set _before_ submit_io, so it may be reset due to being changed
@ -242,22 +242,22 @@ static void bm_set_page_need_writeout(struct page *page)
/**
* drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
* @mdev: DRBD device.
* @device: DRBD device.
* @page_nr: the bitmap page to mark with the "hint" flag
*
* From within an activity log transaction, we mark a few pages with these
* hints, then call drbd_bm_write_hinted(), which will only write out changed
* pages which are flagged with this mark.
*/
void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
{
struct page *page;
if (page_nr >= mdev->bitmap->bm_number_of_pages) {
dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
page_nr, (int)mdev->bitmap->bm_number_of_pages);
if (page_nr >= device->bitmap->bm_number_of_pages) {
drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n",
page_nr, (int)device->bitmap->bm_number_of_pages);
return;
}
page = mdev->bitmap->bm_pages[page_nr];
page = device->bitmap->bm_pages[page_nr];
set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
}
@ -340,7 +340,7 @@ static void bm_unmap(unsigned long *p_addr)
/*
* actually most functions herein should take a struct drbd_bitmap*, not a
* struct drbd_conf*, but for the debug macros I like to have the mdev around
* struct drbd_device*, but for the debug macros I like to have the device around
* to be able to report device specific.
*/
@ -436,11 +436,11 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
/*
* called on driver init only. TODO call when a device is created.
* allocates the drbd_bitmap, and stores it in mdev->bitmap.
* allocates the drbd_bitmap, and stores it in device->bitmap.
*/
int drbd_bm_init(struct drbd_conf *mdev)
int drbd_bm_init(struct drbd_device *device)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
WARN_ON(b != NULL);
b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
if (!b)
@ -449,28 +449,28 @@ int drbd_bm_init(struct drbd_conf *mdev)
mutex_init(&b->bm_change);
init_waitqueue_head(&b->bm_io_wait);
mdev->bitmap = b;
device->bitmap = b;
return 0;
}
sector_t drbd_bm_capacity(struct drbd_conf *mdev)
sector_t drbd_bm_capacity(struct drbd_device *device)
{
if (!expect(mdev->bitmap))
if (!expect(device->bitmap))
return 0;
return mdev->bitmap->bm_dev_capacity;
return device->bitmap->bm_dev_capacity;
}
/* called on driver unload. TODO: call when a device is destroyed.
*/
void drbd_bm_cleanup(struct drbd_conf *mdev)
void drbd_bm_cleanup(struct drbd_device *device)
{
if (!expect(mdev->bitmap))
if (!expect(device->bitmap))
return;
bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
kfree(mdev->bitmap);
mdev->bitmap = NULL;
bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
kfree(device->bitmap);
device->bitmap = NULL;
}
/*
@ -631,9 +631,9 @@ static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev)
* In case this is actually a resize, we copy the old bitmap into the new one.
* Otherwise, the bitmap is initialized to all bits set.
*/
int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long bits, words, owords, obits;
unsigned long want, have, onpages; /* number of pages */
struct page **npages, **opages = NULL;
@ -643,9 +643,9 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
if (!expect(b))
return -ENOMEM;
drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
drbd_bm_lock(device, "resize", BM_LOCKED_MASK);
dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
drbd_info(device, "drbd_bm_resize called with capacity == %llu\n",
(unsigned long long)capacity);
if (capacity == b->bm_dev_capacity)
@ -678,12 +678,12 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
*/
words = ALIGN(bits, 64) >> LN2_BPL;
if (get_ldev(mdev)) {
u64 bits_on_disk = drbd_md_on_disk_bits(mdev->ldev);
put_ldev(mdev);
if (get_ldev(device)) {
u64 bits_on_disk = drbd_md_on_disk_bits(device->ldev);
put_ldev(device);
if (bits > bits_on_disk) {
dev_info(DEV, "bits = %lu\n", bits);
dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk);
drbd_info(device, "bits = %lu\n", bits);
drbd_info(device, "bits_on_disk = %llu\n", bits_on_disk);
err = -ENOSPC;
goto out;
}
@ -692,10 +692,10 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
have = b->bm_number_of_pages;
if (want == have) {
D_ASSERT(b->bm_pages != NULL);
D_ASSERT(device, b->bm_pages != NULL);
npages = b->bm_pages;
} else {
if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC))
if (drbd_insert_fault(device, DRBD_FAULT_BM_ALLOC))
npages = NULL;
else
npages = bm_realloc_pages(b, want);
@ -742,10 +742,10 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
bm_vk_free(opages, opages_vmalloced);
if (!growing)
b->bm_set = bm_count_bits(b);
dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
out:
drbd_bm_unlock(mdev);
drbd_bm_unlock(device);
return err;
}
@ -757,9 +757,9 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
*
* maybe bm_set should be atomic_t ?
*/
unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
unsigned long _drbd_bm_total_weight(struct drbd_device *device)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long s;
unsigned long flags;
@ -775,20 +775,20 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
return s;
}
unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
unsigned long drbd_bm_total_weight(struct drbd_device *device)
{
unsigned long s;
/* if I don't have a disk, I don't know about out-of-sync status */
if (!get_ldev_if_state(mdev, D_NEGOTIATING))
if (!get_ldev_if_state(device, D_NEGOTIATING))
return 0;
s = _drbd_bm_total_weight(mdev);
put_ldev(mdev);
s = _drbd_bm_total_weight(device);
put_ldev(device);
return s;
}
size_t drbd_bm_words(struct drbd_conf *mdev)
size_t drbd_bm_words(struct drbd_device *device)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
if (!expect(b))
return 0;
if (!expect(b->bm_pages))
@ -797,9 +797,9 @@ size_t drbd_bm_words(struct drbd_conf *mdev)
return b->bm_words;
}
unsigned long drbd_bm_bits(struct drbd_conf *mdev)
unsigned long drbd_bm_bits(struct drbd_device *device)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
if (!expect(b))
return 0;
@ -811,10 +811,10 @@ unsigned long drbd_bm_bits(struct drbd_conf *mdev)
* bitmap must be locked by drbd_bm_lock.
* currently only used from receive_bitmap.
*/
void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number,
unsigned long *buffer)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr, *bm;
unsigned long word, bits;
unsigned int idx;
@ -860,10 +860,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
/* copy number words from the bitmap starting at offset into the buffer.
* buffer[i] will be little endian unsigned long.
*/
void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number,
unsigned long *buffer)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr, *bm;
size_t end, do_now;
@ -878,7 +878,7 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
if ((offset >= b->bm_words) ||
(end > b->bm_words) ||
(number <= 0))
dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
drbd_err(device, "offset=%lu number=%lu bm_words=%lu\n",
(unsigned long) offset,
(unsigned long) number,
(unsigned long) b->bm_words);
@ -897,9 +897,9 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
}
/* set all bits in the bitmap */
void drbd_bm_set_all(struct drbd_conf *mdev)
void drbd_bm_set_all(struct drbd_device *device)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
if (!expect(b))
return;
if (!expect(b->bm_pages))
@ -913,9 +913,9 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
}
/* clear all bits in the bitmap */
void drbd_bm_clear_all(struct drbd_conf *mdev)
void drbd_bm_clear_all(struct drbd_device *device)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
if (!expect(b))
return;
if (!expect(b->bm_pages))
@ -928,7 +928,7 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)
}
struct bm_aio_ctx {
struct drbd_conf *mdev;
struct drbd_device *device;
atomic_t in_flight;
unsigned int done;
unsigned flags;
@ -943,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref)
{
struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
put_ldev(ctx->mdev);
put_ldev(ctx->device);
kfree(ctx);
}
@ -951,8 +951,8 @@ static void bm_aio_ctx_destroy(struct kref *kref)
static void bm_async_io_complete(struct bio *bio, int error)
{
struct bm_aio_ctx *ctx = bio->bi_private;
struct drbd_conf *mdev = ctx->mdev;
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_device *device = ctx->device;
struct drbd_bitmap *b = device->bitmap;
unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
int uptodate = bio_flagged(bio, BIO_UPTODATE);
@ -966,7 +966,7 @@ static void bm_async_io_complete(struct bio *bio, int error)
if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
!bm_test_page_unchanged(b->bm_pages[idx]))
dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx);
drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
if (error) {
/* ctx error will hold the completed-last non-zero error code,
@ -976,14 +976,14 @@ static void bm_async_io_complete(struct bio *bio, int error)
/* Not identical to on disk version of it.
* Is BM_PAGE_IO_ERROR enough? */
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n",
drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
error, idx);
} else {
bm_clear_page_io_err(b->bm_pages[idx]);
dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx);
dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
}
bm_page_unlock_io(mdev, idx);
bm_page_unlock_io(device, idx);
if (ctx->flags & BM_AIO_COPY_PAGES)
mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
@ -992,7 +992,7 @@ static void bm_async_io_complete(struct bio *bio, int error)
if (atomic_dec_and_test(&ctx->in_flight)) {
ctx->done = 1;
wake_up(&mdev->misc_wait);
wake_up(&device->misc_wait);
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
}
}
@ -1000,23 +1000,23 @@ static void bm_async_io_complete(struct bio *bio, int error)
static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
{
struct bio *bio = bio_alloc_drbd(GFP_NOIO);
struct drbd_conf *mdev = ctx->mdev;
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_device *device = ctx->device;
struct drbd_bitmap *b = device->bitmap;
struct page *page;
unsigned int len;
sector_t on_disk_sector =
mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
device->ldev->md.md_offset + device->ldev->md.bm_offset;
on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
/* this might happen with very small
* flexible external meta data device,
* or with PAGE_SIZE > 4k */
len = min_t(unsigned int, PAGE_SIZE,
(drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
(drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
/* serialize IO on this page */
bm_page_lock_io(mdev, page_nr);
bm_page_lock_io(device, page_nr);
/* before memcpy and submit,
* so it can be redirtied any time */
bm_set_page_unchanged(b->bm_pages[page_nr]);
@ -1027,7 +1027,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
bio->bi_bdev = mdev->ldev->md_bdev;
bio->bi_bdev = device->ldev->md_bdev;
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
@ -1035,24 +1035,24 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
bio->bi_private = ctx;
bio->bi_end_io = bm_async_io_complete;
if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
bio->bi_rw |= rw;
bio_endio(bio, -EIO);
} else {
submit_bio(rw, bio);
/* this should not count as user activity and cause the
* resync to throttle -- see drbd_rs_should_slow_down(). */
atomic_add(len >> 9, &mdev->rs_sect_ev);
atomic_add(len >> 9, &device->rs_sect_ev);
}
}
/*
* bm_rw: read/write the whole bitmap from/to its on disk location.
*/
static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
{
struct bm_aio_ctx *ctx;
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
int num_pages, i, count = 0;
unsigned long now;
char ppb[10];
@ -1072,7 +1072,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
return -ENOMEM;
*ctx = (struct bm_aio_ctx) {
.mdev = mdev,
.device = device,
.in_flight = ATOMIC_INIT(1),
.done = 0,
.flags = flags,
@ -1080,8 +1080,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
.kref = { ATOMIC_INIT(2) },
};
if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
kfree(ctx);
return -ENODEV;
}
@ -1106,14 +1106,14 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
if (!(flags & BM_WRITE_ALL_PAGES) &&
bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
continue;
}
/* during lazy writeout,
* ignore those pages not marked for lazy writeout. */
if (lazy_writeout_upper_idx &&
!bm_test_page_lazy_writeout(b->bm_pages[i])) {
dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i);
dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i);
continue;
}
}
@ -1132,19 +1132,19 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
* "in_flight reached zero, all done" event.
*/
if (!atomic_dec_and_test(&ctx->in_flight))
wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
else
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
/* summary for global bitmap IO */
if (flags == 0)
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n",
rw == WRITE ? "WRITE" : "READ",
count, jiffies - now);
if (ctx->error) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n");
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
err = -EIO; /* ctx->error ? */
}
@ -1153,16 +1153,16 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
now = jiffies;
if (rw == WRITE) {
drbd_md_flush(mdev);
drbd_md_flush(device);
} else /* rw == READ */ {
b->bm_set = bm_count_bits(b);
dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
jiffies - now);
}
now = b->bm_set;
if (flags == 0)
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
@ -1171,48 +1171,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
/**
* drbd_bm_read() - Read the whole bitmap from its on disk location.
* @mdev: DRBD device.
* @device: DRBD device.
*/
int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
int drbd_bm_read(struct drbd_device *device) __must_hold(local)
{
return bm_rw(mdev, READ, 0, 0);
return bm_rw(device, READ, 0, 0);
}
/**
* drbd_bm_write() - Write the whole bitmap to its on disk location.
* @mdev: DRBD device.
* @device: DRBD device.
*
* Will only write pages that have changed since last IO.
*/
int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
int drbd_bm_write(struct drbd_device *device) __must_hold(local)
{
return bm_rw(mdev, WRITE, 0, 0);
return bm_rw(device, WRITE, 0, 0);
}
/**
* drbd_bm_write_all() - Write the whole bitmap to its on disk location.
* @mdev: DRBD device.
* @device: DRBD device.
*
* Will write all pages.
*/
int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local)
int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
{
return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0);
}
/**
* drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
* @mdev: DRBD device.
* @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
*/
int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
{
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx);
return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0);
}
/**
* drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
* @mdev: DRBD device.
* @device: DRBD device.
*
* Will only write pages that have changed since last IO.
* In contrast to drbd_bm_write(), this will copy the bitmap pages
@ -1221,23 +1211,23 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l
* verify is aborted due to a failed peer disk, while local IO continues, or
* pending resync acks are still being processed.
*/
int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
{
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0);
}
/**
* drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
* @mdev: DRBD device.
* @device: DRBD device.
*/
int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
{
return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
}
/**
* drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
* @mdev: DRBD device.
* @device: DRBD device.
* @idx: bitmap page index
*
* We don't want to special case on logical_block_size of the backend device,
@ -1247,13 +1237,13 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
* In case this becomes an issue on systems with larger PAGE_SIZE,
* we may want to change this again to write 4k aligned 4k pieces.
*/
int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local)
{
struct bm_aio_ctx *ctx;
int err;
if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) {
dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx);
return 0;
}
@ -1262,7 +1252,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
return -ENOMEM;
*ctx = (struct bm_aio_ctx) {
.mdev = mdev,
.device = device,
.in_flight = ATOMIC_INIT(1),
.done = 0,
.flags = BM_AIO_COPY_PAGES,
@ -1270,21 +1260,21 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
.kref = { ATOMIC_INIT(2) },
};
if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
kfree(ctx);
return -ENODEV;
}
bm_page_io_async(ctx, idx, WRITE_SYNC);
wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
if (ctx->error)
drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
/* that causes us to detach, so the in memory bitmap will be
* gone in a moment as well. */
mdev->bm_writ_cnt++;
device->bm_writ_cnt++;
err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
return err;
@ -1298,17 +1288,17 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
*
* this returns a bit number, NOT a sector!
*/
static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo,
const int find_zero_bit)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr;
unsigned long bit_offset;
unsigned i;
if (bm_fo > b->bm_bits) {
dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
drbd_err(device, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
bm_fo = DRBD_END_OF_BITMAP;
} else {
while (bm_fo < b->bm_bits) {
@ -1338,10 +1328,10 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
return bm_fo;
}
static unsigned long bm_find_next(struct drbd_conf *mdev,
static unsigned long bm_find_next(struct drbd_device *device,
unsigned long bm_fo, const int find_zero_bit)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long i = DRBD_END_OF_BITMAP;
if (!expect(b))
@ -1351,39 +1341,39 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
spin_lock_irq(&b->bm_lock);
if (BM_DONT_TEST & b->bm_flags)
bm_print_lock_info(mdev);
bm_print_lock_info(device);
i = __bm_find_next(mdev, bm_fo, find_zero_bit);
i = __bm_find_next(device, bm_fo, find_zero_bit);
spin_unlock_irq(&b->bm_lock);
return i;
}
unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
{
return bm_find_next(mdev, bm_fo, 0);
return bm_find_next(device, bm_fo, 0);
}
#if 0
/* not yet needed for anything. */
unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
{
return bm_find_next(mdev, bm_fo, 1);
return bm_find_next(device, bm_fo, 1);
}
#endif
/* does not spin_lock_irqsave.
* you must take drbd_bm_lock() first */
unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
{
/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
return __bm_find_next(mdev, bm_fo, 0);
/* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
return __bm_find_next(device, bm_fo, 0);
}
unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
{
/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
return __bm_find_next(mdev, bm_fo, 1);
/* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
return __bm_find_next(device, bm_fo, 1);
}
/* returns number of bits actually changed.
@ -1392,10 +1382,10 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f
* wants bitnr, not sector.
* expected to be called for only a few bits (e - s about BITS_PER_LONG).
* Must hold bitmap lock already. */
static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s,
unsigned long e, int val)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr = NULL;
unsigned long bitnr;
unsigned int last_page_nr = -1U;
@ -1403,7 +1393,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
int changed_total = 0;
if (e >= b->bm_bits) {
dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
drbd_err(device, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
s, e, b->bm_bits);
e = b->bm_bits ? b->bm_bits -1 : 0;
}
@ -1441,11 +1431,11 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
* for val != 0, we change 0 -> 1, return code positive
* for val == 0, we change 1 -> 0, return code negative
* wants bitnr, not sector */
static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
static int bm_change_bits_to(struct drbd_device *device, const unsigned long s,
const unsigned long e, int val)
{
unsigned long flags;
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
int c = 0;
if (!expect(b))
@ -1455,24 +1445,24 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
spin_lock_irqsave(&b->bm_lock, flags);
if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
bm_print_lock_info(mdev);
bm_print_lock_info(device);
c = __bm_change_bits_to(mdev, s, e, val);
c = __bm_change_bits_to(device, s, e, val);
spin_unlock_irqrestore(&b->bm_lock, flags);
return c;
}
/* returns number of bits changed 0 -> 1 */
int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
return bm_change_bits_to(mdev, s, e, 1);
return bm_change_bits_to(device, s, e, 1);
}
/* returns number of bits changed 1 -> 0 */
int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
return -bm_change_bits_to(mdev, s, e, 0);
return -bm_change_bits_to(device, s, e, 0);
}
/* sets all bits in full words,
@ -1504,7 +1494,7 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
* You must first drbd_bm_lock().
* Can be called to set the whole bitmap in one go.
* Sets bits from s to e _inclusive_. */
void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
/* First set_bit from the first bit (s)
* up to the next long boundary (sl),
@ -1514,7 +1504,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* Do not use memset, because we must account for changes,
* so we need to loop over the words with hweight() anyways.
*/
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long sl = ALIGN(s,BITS_PER_LONG);
unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
int first_page;
@ -1526,7 +1516,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
if (e - s <= 3*BITS_PER_LONG) {
/* don't bother; el and sl may even be wrong. */
spin_lock_irq(&b->bm_lock);
__bm_change_bits_to(mdev, s, e, 1);
__bm_change_bits_to(device, s, e, 1);
spin_unlock_irq(&b->bm_lock);
return;
}
@ -1537,7 +1527,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
/* bits filling the current long */
if (sl)
__bm_change_bits_to(mdev, s, sl-1, 1);
__bm_change_bits_to(device, s, sl-1, 1);
first_page = sl >> (3 + PAGE_SHIFT);
last_page = el >> (3 + PAGE_SHIFT);
@ -1549,7 +1539,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
/* first and full pages, unless first page == last page */
for (page_nr = first_page; page_nr < last_page; page_nr++) {
bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
bm_set_full_words_within_one_page(device->bitmap, page_nr, first_word, last_word);
spin_unlock_irq(&b->bm_lock);
cond_resched();
first_word = 0;
@ -1565,7 +1555,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* as we did not allocate it, it is not present in bitmap->bm_pages.
*/
if (last_word)
bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
bm_set_full_words_within_one_page(device->bitmap, last_page, first_word, last_word);
/* possibly trailing bits.
* example: (e & 63) == 63, el will be e+1.
@ -1573,7 +1563,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* it would trigger an assert in __bm_change_bits_to()
*/
if (el <= e)
__bm_change_bits_to(mdev, el, e, 1);
__bm_change_bits_to(device, el, e, 1);
spin_unlock_irq(&b->bm_lock);
}
@ -1584,10 +1574,10 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* 0 ... bit not set
* -1 ... first out of bounds access, stop testing for bits!
*/
int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr)
{
unsigned long flags;
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr;
int i;
@ -1598,7 +1588,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
bm_print_lock_info(mdev);
bm_print_lock_info(device);
if (bitnr < b->bm_bits) {
p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
@ -1606,7 +1596,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
} else if (bitnr == b->bm_bits) {
i = -1;
} else { /* (bitnr > b->bm_bits) */
dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
drbd_err(device, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
i = 0;
}
@ -1615,10 +1605,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
}
/* returns number of bits set in the range [s, e] */
int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
unsigned long flags;
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr = NULL;
unsigned long bitnr;
unsigned int page_nr = -1U;
@ -1635,7 +1625,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
bm_print_lock_info(mdev);
bm_print_lock_info(device);
for (bitnr = s; bitnr <= e; bitnr++) {
unsigned int idx = bm_bit_to_page_idx(b, bitnr);
if (page_nr != idx) {
@ -1647,7 +1637,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
if (expect(bitnr < b->bm_bits))
c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
else
dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
}
if (p_addr)
bm_unmap(p_addr);
@ -1670,9 +1660,9 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* reference count of some bitmap extent element from some lru instead...
*
*/
int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
{
struct drbd_bitmap *b = mdev->bitmap;
struct drbd_bitmap *b = device->bitmap;
int count, s, e;
unsigned long flags;
unsigned long *p_addr, *bm;
@ -1684,7 +1674,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
bm_print_lock_info(mdev);
bm_print_lock_info(device);
s = S2W(enr);
e = min((size_t)S2W(enr+1), b->bm_words);
@ -1697,7 +1687,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
count += hweight_long(*bm++);
bm_unmap(p_addr);
} else {
dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
}
spin_unlock_irqrestore(&b->bm_lock, flags);
return count;

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -46,7 +46,7 @@ const struct file_operations drbd_proc_fops = {
.release = drbd_proc_release,
};
void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
{
/* v is in kB/sec. We don't expect TiByte/sec yet. */
if (unlikely(v >= 1000000)) {
@ -66,14 +66,14 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
* [=====>..............] 33.5% (23456/123456)
* finish: 2:20:20 speed: 6,345 (6,456) K/sec
*/
static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq)
{
unsigned long db, dt, dbdt, rt, rs_left;
unsigned int res;
int i, x, y;
int stalled = 0;
drbd_get_syncer_progress(mdev, &rs_left, &res);
drbd_get_syncer_progress(device, &rs_left, &res);
x = res/50;
y = 20-x;
@ -85,21 +85,21 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
seq_printf(seq, ".");
seq_printf(seq, "] ");
if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
seq_printf(seq, "verified:");
else
seq_printf(seq, "sync'ed:");
seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
/* if more than a few GB, display in MB */
if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
seq_printf(seq, "(%lu/%lu)M",
(unsigned long) Bit2KB(rs_left >> 10),
(unsigned long) Bit2KB(mdev->rs_total >> 10));
(unsigned long) Bit2KB(device->rs_total >> 10));
else
seq_printf(seq, "(%lu/%lu)K\n\t",
(unsigned long) Bit2KB(rs_left),
(unsigned long) Bit2KB(mdev->rs_total));
(unsigned long) Bit2KB(device->rs_total));
/* see drivers/md/md.c
* We do not want to overflow, so the order of operands and
@ -114,14 +114,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
* at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
* least DRBD_SYNC_MARK_STEP time before it will be modified. */
/* ------------------------ ~18s average ------------------------ */
i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ;
if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
stalled = 1;
if (!dt)
dt++;
db = mdev->rs_mark_left[i] - rs_left;
db = device->rs_mark_left[i] - rs_left;
rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
seq_printf(seq, "finish: %lu:%02lu:%02lu",
@ -134,11 +134,11 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
/* ------------------------- ~3s average ------------------------ */
if (proc_details >= 1) {
/* this is what drbd_rs_should_slow_down() uses */
i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ;
if (!dt)
dt++;
db = mdev->rs_mark_left[i] - rs_left;
db = device->rs_mark_left[i] - rs_left;
dbdt = Bit2KB(db/dt);
seq_printf_with_thousands_grouping(seq, dbdt);
seq_printf(seq, " -- ");
@ -147,34 +147,34 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
/* --------------------- long term average ---------------------- */
/* mean speed since syncer started
* we do account for PausedSync periods */
dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
if (dt == 0)
dt = 1;
db = mdev->rs_total - rs_left;
db = device->rs_total - rs_left;
dbdt = Bit2KB(db/dt);
seq_printf_with_thousands_grouping(seq, dbdt);
seq_printf(seq, ")");
if (mdev->state.conn == C_SYNC_TARGET ||
mdev->state.conn == C_VERIFY_S) {
if (device->state.conn == C_SYNC_TARGET ||
device->state.conn == C_VERIFY_S) {
seq_printf(seq, " want: ");
seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate);
seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
if (proc_details >= 1) {
/* 64 bit:
* we convert to sectors in the display below. */
unsigned long bm_bits = drbd_bm_bits(mdev);
unsigned long bm_bits = drbd_bm_bits(device);
unsigned long bit_pos;
unsigned long long stop_sector = 0;
if (mdev->state.conn == C_VERIFY_S ||
mdev->state.conn == C_VERIFY_T) {
bit_pos = bm_bits - mdev->ov_left;
if (verify_can_do_stop_sector(mdev))
stop_sector = mdev->ov_stop_sector;
if (device->state.conn == C_VERIFY_S ||
device->state.conn == C_VERIFY_T) {
bit_pos = bm_bits - device->ov_left;
if (verify_can_do_stop_sector(device))
stop_sector = device->ov_stop_sector;
} else
bit_pos = mdev->bm_resync_fo;
bit_pos = device->bm_resync_fo;
/* Total sectors may be slightly off for oddly
* sized devices. So what. */
seq_printf(seq,
@ -202,7 +202,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
{
int i, prev_i = -1;
const char *sn;
struct drbd_conf *mdev;
struct drbd_device *device;
struct net_conf *nc;
char wp;
@ -236,72 +236,72 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
*/
rcu_read_lock();
idr_for_each_entry(&minors, mdev, i) {
idr_for_each_entry(&drbd_devices, device, i) {
if (prev_i != i - 1)
seq_printf(seq, "\n");
prev_i = i;
sn = drbd_conn_str(mdev->state.conn);
sn = drbd_conn_str(device->state.conn);
if (mdev->state.conn == C_STANDALONE &&
mdev->state.disk == D_DISKLESS &&
mdev->state.role == R_SECONDARY) {
if (device->state.conn == C_STANDALONE &&
device->state.disk == D_DISKLESS &&
device->state.role == R_SECONDARY) {
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
} else {
/* reset mdev->congestion_reason */
bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
/* reset device->congestion_reason */
bdi_rw_congested(&device->rq_queue->backing_dev_info);
nc = rcu_dereference(mdev->tconn->net_conf);
nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
seq_printf(seq,
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
"lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
i, sn,
drbd_role_str(mdev->state.role),
drbd_role_str(mdev->state.peer),
drbd_disk_str(mdev->state.disk),
drbd_disk_str(mdev->state.pdsk),
drbd_role_str(device->state.role),
drbd_role_str(device->state.peer),
drbd_disk_str(device->state.disk),
drbd_disk_str(device->state.pdsk),
wp,
drbd_suspended(mdev) ? 's' : 'r',
mdev->state.aftr_isp ? 'a' : '-',
mdev->state.peer_isp ? 'p' : '-',
mdev->state.user_isp ? 'u' : '-',
mdev->congestion_reason ?: '-',
test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
mdev->send_cnt/2,
mdev->recv_cnt/2,
mdev->writ_cnt/2,
mdev->read_cnt/2,
mdev->al_writ_cnt,
mdev->bm_writ_cnt,
atomic_read(&mdev->local_cnt),
atomic_read(&mdev->ap_pending_cnt) +
atomic_read(&mdev->rs_pending_cnt),
atomic_read(&mdev->unacked_cnt),
atomic_read(&mdev->ap_bio_cnt),
mdev->tconn->epochs,
write_ordering_chars[mdev->tconn->write_ordering]
drbd_suspended(device) ? 's' : 'r',
device->state.aftr_isp ? 'a' : '-',
device->state.peer_isp ? 'p' : '-',
device->state.user_isp ? 'u' : '-',
device->congestion_reason ?: '-',
test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
device->send_cnt/2,
device->recv_cnt/2,
device->writ_cnt/2,
device->read_cnt/2,
device->al_writ_cnt,
device->bm_writ_cnt,
atomic_read(&device->local_cnt),
atomic_read(&device->ap_pending_cnt) +
atomic_read(&device->rs_pending_cnt),
atomic_read(&device->unacked_cnt),
atomic_read(&device->ap_bio_cnt),
first_peer_device(device)->connection->epochs,
write_ordering_chars[first_peer_device(device)->connection->write_ordering]
);
seq_printf(seq, " oos:%llu\n",
Bit2KB((unsigned long long)
drbd_bm_total_weight(mdev)));
drbd_bm_total_weight(device)));
}
if (mdev->state.conn == C_SYNC_SOURCE ||
mdev->state.conn == C_SYNC_TARGET ||
mdev->state.conn == C_VERIFY_S ||
mdev->state.conn == C_VERIFY_T)
drbd_syncer_progress(mdev, seq);
if (device->state.conn == C_SYNC_SOURCE ||
device->state.conn == C_SYNC_TARGET ||
device->state.conn == C_VERIFY_S ||
device->state.conn == C_VERIFY_T)
drbd_syncer_progress(device, seq);
if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
lc_seq_printf_stats(seq, mdev->resync);
lc_seq_printf_stats(seq, mdev->act_log);
put_ldev(mdev);
if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
lc_seq_printf_stats(seq, device->resync);
lc_seq_printf_stats(seq, device->act_log);
put_ldev(device);
}
if (proc_details >= 2) {
if (mdev->resync) {
lc_seq_dump_details(seq, mdev->resync, "rs_left",
if (device->resync) {
lc_seq_dump_details(seq, device->resync, "rs_left",
resync_dump_detail);
}
}

View file

@ -0,0 +1,295 @@
#ifndef __DRBD_PROTOCOL_H
#define __DRBD_PROTOCOL_H
enum drbd_packet {
/* receiver (data socket) */
P_DATA = 0x00,
P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */
P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */
P_BARRIER = 0x03,
P_BITMAP = 0x04,
P_BECOME_SYNC_TARGET = 0x05,
P_BECOME_SYNC_SOURCE = 0x06,
P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */
P_DATA_REQUEST = 0x08, /* Used to ask for a data block */
P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */
P_SYNC_PARAM = 0x0a,
P_PROTOCOL = 0x0b,
P_UUIDS = 0x0c,
P_SIZES = 0x0d,
P_STATE = 0x0e,
P_SYNC_UUID = 0x0f,
P_AUTH_CHALLENGE = 0x10,
P_AUTH_RESPONSE = 0x11,
P_STATE_CHG_REQ = 0x12,
/* asender (meta socket */
P_PING = 0x13,
P_PING_ACK = 0x14,
P_RECV_ACK = 0x15, /* Used in protocol B */
P_WRITE_ACK = 0x16, /* Used in protocol C */
P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
P_SUPERSEDED = 0x18, /* Used in proto C, two-primaries conflict detection */
P_NEG_ACK = 0x19, /* Sent if local disk is unusable */
P_NEG_DREPLY = 0x1a, /* Local disk is broken... */
P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */
P_BARRIER_ACK = 0x1c,
P_STATE_CHG_REPLY = 0x1d,
/* "new" commands, no longer fitting into the ordering scheme above */
P_OV_REQUEST = 0x1e, /* data socket */
P_OV_REPLY = 0x1f,
P_OV_RESULT = 0x20, /* meta socket */
P_CSUM_RS_REQUEST = 0x21, /* data socket */
P_RS_IS_IN_SYNC = 0x22, /* meta socket */
P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */
/* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */
/* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */
P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */
P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */
P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */
P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */
P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101,
/* special command ids for handshake */
P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */
P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */
P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */
};
#ifndef __packed
#define __packed __attribute__((packed))
#endif
/* This is the layout for a packet on the wire.
* The byteorder is the network byte order.
* (except block_id and barrier fields.
* these are pointers to local structs
* and have no relevance for the partner,
* which just echoes them as received.)
*
* NOTE that the payload starts at a long aligned offset,
* regardless of 32 or 64 bit arch!
*/
struct p_header80 {
u32 magic;
u16 command;
u16 length; /* bytes of data after this header */
} __packed;
/* Header for big packets, Used for data packets exceeding 64kB */
struct p_header95 {
u16 magic; /* use DRBD_MAGIC_BIG here */
u16 command;
u32 length;
} __packed;
struct p_header100 {
u32 magic;
u16 volume;
u16 command;
u32 length;
u32 pad;
} __packed;
/* these defines must not be changed without changing the protocol version */
#define DP_HARDBARRIER 1 /* depricated */
#define DP_RW_SYNC 2 /* equals REQ_SYNC */
#define DP_MAY_SET_IN_SYNC 4
#define DP_UNPLUG 8 /* not used anymore */
#define DP_FUA 16 /* equals REQ_FUA */
#define DP_FLUSH 32 /* equals REQ_FLUSH */
#define DP_DISCARD 64 /* equals REQ_DISCARD */
#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */
struct p_data {
u64 sector; /* 64 bits sector number */
u64 block_id; /* to identify the request in protocol B&C */
u32 seq_num;
u32 dp_flags;
} __packed;
/*
* commands which share a struct:
* p_block_ack:
* P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
* P_SUPERSEDED (proto C, two-primaries conflict detection)
* p_block_req:
* P_DATA_REQUEST, P_RS_DATA_REQUEST
*/
struct p_block_ack {
u64 sector;
u64 block_id;
u32 blksize;
u32 seq_num;
} __packed;
struct p_block_req {
u64 sector;
u64 block_id;
u32 blksize;
u32 pad; /* to multiple of 8 Byte */
} __packed;
/*
* commands with their own struct for additional fields:
* P_CONNECTION_FEATURES
* P_BARRIER
* P_BARRIER_ACK
* P_SYNC_PARAM
* ReportParams
*/
struct p_connection_features {
u32 protocol_min;
u32 feature_flags;
u32 protocol_max;
/* should be more than enough for future enhancements
* for now, feature_flags and the reserved array shall be zero.
*/
u32 _pad;
u64 reserved[7];
} __packed;
struct p_barrier {
u32 barrier; /* barrier number _handle_ only */
u32 pad; /* to multiple of 8 Byte */
} __packed;
struct p_barrier_ack {
u32 barrier;
u32 set_size;
} __packed;
struct p_rs_param {
u32 resync_rate;
/* Since protocol version 88 and higher. */
char verify_alg[0];
} __packed;
struct p_rs_param_89 {
u32 resync_rate;
/* protocol version 89: */
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
} __packed;
struct p_rs_param_95 {
u32 resync_rate;
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
u32 c_plan_ahead;
u32 c_delay_target;
u32 c_fill_target;
u32 c_max_rate;
} __packed;
enum drbd_conn_flags {
CF_DISCARD_MY_DATA = 1,
CF_DRY_RUN = 2,
};
struct p_protocol {
u32 protocol;
u32 after_sb_0p;
u32 after_sb_1p;
u32 after_sb_2p;
u32 conn_flags;
u32 two_primaries;
/* Since protocol version 87 and higher. */
char integrity_alg[0];
} __packed;
struct p_uuids {
u64 uuid[UI_EXTENDED_SIZE];
} __packed;
struct p_rs_uuid {
u64 uuid;
} __packed;
struct p_sizes {
u64 d_size; /* size of disk */
u64 u_size; /* user requested size */
u64 c_size; /* current exported size */
u32 max_bio_size; /* Maximal size of a BIO */
u16 queue_order_type; /* not yet implemented in DRBD*/
u16 dds_flags; /* use enum dds_flags here. */
} __packed;
struct p_state {
u32 state;
} __packed;
struct p_req_state {
u32 mask;
u32 val;
} __packed;
struct p_req_state_reply {
u32 retcode;
} __packed;
struct p_drbd06_param {
u64 size;
u32 state;
u32 blksize;
u32 protocol;
u32 version;
u32 gen_cnt[5];
u32 bit_map_gen[5];
} __packed;
struct p_block_desc {
u64 sector;
u32 blksize;
u32 pad; /* to multiple of 8 Byte */
} __packed;
/* Valid values for the encoding field.
* Bump proto version when changing this. */
enum drbd_bitmap_code {
/* RLE_VLI_Bytes = 0,
* and other bit variants had been defined during
* algorithm evaluation. */
RLE_VLI_Bits = 2,
};
struct p_compressed_bm {
/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
* (encoding & 0x80): polarity (set/unset) of first runlength
* ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
* used to pad up to head.length bytes
*/
u8 encoding;
u8 code[0];
} __packed;
struct p_delay_probe93 {
u32 seq_num; /* sequence number to match the two probe packets */
u32 offset; /* usecs the probe got sent after the reference time point */
} __packed;
/*
* Bitmap packets need to fit within a single page on the sender and receiver,
* so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
*/
#define DRBD_SOCKET_BUFFER_SIZE 4096
#endif /* __DRBD_PROTOCOL_H */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -275,17 +275,17 @@ struct bio_and_error {
int error;
};
extern void start_new_tl_epoch(struct drbd_tconn *tconn);
extern void start_new_tl_epoch(struct drbd_connection *connection);
extern void drbd_req_destroy(struct kref *kref);
extern void _req_may_be_done(struct drbd_request *req,
struct bio_and_error *m);
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m);
extern void complete_master_bio(struct drbd_conf *mdev,
extern void complete_master_bio(struct drbd_device *device,
struct bio_and_error *m);
extern void request_timer_fn(unsigned long data);
extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
/* this is in drbd_main.c */
extern void drbd_restart_request(struct drbd_request *req);
@ -294,14 +294,14 @@ extern void drbd_restart_request(struct drbd_request *req);
* outside the spinlock, e.g. when walking some list on cleanup. */
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
{
struct drbd_conf *mdev = req->w.mdev;
struct drbd_device *device = req->device;
struct bio_and_error m;
int rv;
/* __req_mod possibly frees req, do not touch req after that! */
rv = __req_mod(req, what, &m);
if (m.bio)
complete_master_bio(mdev, &m);
complete_master_bio(device, &m);
return rv;
}
@ -314,16 +314,16 @@ static inline int req_mod(struct drbd_request *req,
enum drbd_req_event what)
{
unsigned long flags;
struct drbd_conf *mdev = req->w.mdev;
struct drbd_device *device = req->device;
struct bio_and_error m;
int rv;
spin_lock_irqsave(&mdev->tconn->req_lock, flags);
spin_lock_irqsave(&device->resource->req_lock, flags);
rv = __req_mod(req, what, &m);
spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
spin_unlock_irqrestore(&device->resource->req_lock, flags);
if (m.bio)
complete_master_bio(mdev, &m);
complete_master_bio(device, &m);
return rv;
}

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
#ifndef DRBD_STATE_H
#define DRBD_STATE_H
struct drbd_conf;
struct drbd_tconn;
struct drbd_device;
struct drbd_connection;
/**
* DOC: DRBD State macros
@ -107,36 +107,36 @@ union drbd_dev_state {
unsigned int i;
};
extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
extern enum drbd_state_rv drbd_change_state(struct drbd_device *device,
enum chg_state_flags f,
union drbd_state mask,
union drbd_state val);
extern void drbd_force_state(struct drbd_conf *, union drbd_state,
extern void drbd_force_state(struct drbd_device *, union drbd_state,
union drbd_state);
extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
extern enum drbd_state_rv _drbd_request_state(struct drbd_device *,
union drbd_state,
union drbd_state,
enum chg_state_flags);
extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state,
enum chg_state_flags,
struct completion *done);
extern void print_st_err(struct drbd_conf *, union drbd_state,
extern void print_st_err(struct drbd_device *, union drbd_state,
union drbd_state, int);
enum drbd_state_rv
_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
_conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
enum chg_state_flags flags);
enum drbd_state_rv
conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
enum chg_state_flags flags);
extern void drbd_resume_al(struct drbd_conf *mdev);
extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
extern void drbd_resume_al(struct drbd_device *device);
extern bool conn_all_vols_unconf(struct drbd_connection *connection);
/**
* drbd_request_state() - Reqest a state change
* @mdev: DRBD device.
* @device: DRBD device.
* @mask: mask of state bits to change.
* @val: value of new state bits.
*
@ -144,18 +144,18 @@ extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
* quite verbose in case the state change is not possible, and all those
* state changes are globally serialized.
*/
static inline int drbd_request_state(struct drbd_conf *mdev,
static inline int drbd_request_state(struct drbd_device *device,
union drbd_state mask,
union drbd_state val)
{
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED);
}
enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn);
enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn);
enum drbd_role conn_highest_role(struct drbd_connection *connection);
enum drbd_role conn_highest_peer(struct drbd_connection *connection);
enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection);
enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection);
enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection);
enum drbd_conns conn_lowest_conn(struct drbd_connection *connection);
#endif

View file

@ -24,6 +24,7 @@
*/
#include <linux/drbd.h>
#include "drbd_strings.h"
static const char *drbd_conn_s_names[] = {
[C_STANDALONE] = "StandAlone",

View file

@ -0,0 +1,9 @@
#ifndef __DRBD_STRINGS_H
#define __DRBD_STRINGS_H
extern const char *drbd_conn_str(enum drbd_conns);
extern const char *drbd_role_str(enum drbd_role);
extern const char *drbd_disk_str(enum drbd_disk_state);
extern const char *drbd_set_st_err_str(enum drbd_state_rv);
#endif /* __DRBD_STRINGS_H */

File diff suppressed because it is too large Load diff

View file

@ -9,12 +9,12 @@
extern char *drbd_sec_holder;
/* sets the number of 512 byte sectors of our virtual device */
static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
static inline void drbd_set_my_capacity(struct drbd_device *device,
sector_t size)
{
/* set_capacity(mdev->this_bdev->bd_disk, size); */
set_capacity(mdev->vdisk, size);
mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9;
/* set_capacity(device->this_bdev->bd_disk, size); */
set_capacity(device->vdisk, size);
device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
}
#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
@ -27,20 +27,20 @@ extern void drbd_request_endio(struct bio *bio, int error);
/*
* used to submit our private bio
*/
static inline void drbd_generic_make_request(struct drbd_conf *mdev,
static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
if (!bio->bi_bdev) {
printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
"bio->bi_bdev == NULL\n",
mdev_to_minor(mdev));
device_to_minor(device));
dump_stack();
bio_endio(bio, -ENODEV);
return;
}
if (drbd_insert_fault(mdev, fault_type))
if (drbd_insert_fault(device, fault_type))
bio_endio(bio, -EIO);
else
generic_make_request(bio);

View file

@ -252,38 +252,45 @@ static void mtip_async_complete(struct mtip_port *port,
void *data,
int status)
{
struct mtip_cmd *command;
struct mtip_cmd *cmd;
struct driver_data *dd = data;
int cb_status = status ? -EIO : 0;
int unaligned, cb_status = status ? -EIO : 0;
void (*func)(void *, int);
if (unlikely(!dd) || unlikely(!port))
return;
command = &port->commands[tag];
cmd = &port->commands[tag];
if (unlikely(status == PORT_IRQ_TF_ERR)) {
dev_warn(&port->dd->pdev->dev,
"Command tag %d failed due to TFE\n", tag);
}
/* Upper layer callback */
if (likely(command->async_callback))
command->async_callback(command->async_data, cb_status);
command->async_callback = NULL;
command->comp_func = NULL;
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&dd->pdev->dev,
command->sg,
command->scatter_ents,
command->direction);
/* Clear the allocated and active bits for the command */
/* Clear the active flag */
atomic_set(&port->commands[tag].active, 0);
release_slot(port, tag);
up(&port->cmd_slot);
/* Upper layer callback */
func = cmd->async_callback;
if (likely(func && cmpxchg(&cmd->async_callback, func, 0) == func)) {
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&dd->pdev->dev,
cmd->sg,
cmd->scatter_ents,
cmd->direction);
func(cmd->async_data, cb_status);
unaligned = cmd->unaligned;
/* Clear the allocated bit for the command */
release_slot(port, tag);
if (unlikely(unaligned))
up(&port->cmd_slot_unal);
else
up(&port->cmd_slot);
}
}
/*
@ -660,11 +667,12 @@ static void mtip_timeout_function(unsigned long int data)
{
struct mtip_port *port = (struct mtip_port *) data;
struct host_to_dev_fis *fis;
struct mtip_cmd *command;
int tag, cmdto_cnt = 0;
struct mtip_cmd *cmd;
int unaligned, tag, cmdto_cnt = 0;
unsigned int bit, group;
unsigned int num_command_slots;
unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
void (*func)(void *, int);
if (unlikely(!port))
return;
@ -694,8 +702,8 @@ static void mtip_timeout_function(unsigned long int data)
group = tag >> 5;
bit = tag & 0x1F;
command = &port->commands[tag];
fis = (struct host_to_dev_fis *) command->command;
cmd = &port->commands[tag];
fis = (struct host_to_dev_fis *) cmd->command;
set_bit(tag, tagaccum);
cmdto_cnt++;
@ -709,27 +717,30 @@ static void mtip_timeout_function(unsigned long int data)
*/
writel(1 << bit, port->completed[group]);
/* Call the async completion callback. */
if (likely(command->async_callback))
command->async_callback(command->async_data,
-EIO);
command->async_callback = NULL;
command->comp_func = NULL;
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&port->dd->pdev->dev,
command->sg,
command->scatter_ents,
command->direction);
/*
* Clear the allocated bit and active tag for the
* command.
*/
/* Clear the active flag for the command */
atomic_set(&port->commands[tag].active, 0);
release_slot(port, tag);
up(&port->cmd_slot);
func = cmd->async_callback;
if (func &&
cmpxchg(&cmd->async_callback, func, 0) == func) {
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&port->dd->pdev->dev,
cmd->sg,
cmd->scatter_ents,
cmd->direction);
func(cmd->async_data, -EIO);
unaligned = cmd->unaligned;
/* Clear the allocated bit for the command. */
release_slot(port, tag);
if (unaligned)
up(&port->cmd_slot_unal);
else
up(&port->cmd_slot);
}
}
}
@ -4213,6 +4224,7 @@ skip_create_disk:
blk_queue_max_hw_sectors(dd->queue, 0xffff);
blk_queue_max_segment_size(dd->queue, 0x400000);
blk_queue_io_min(dd->queue, 4096);
blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask);
/*
* write back cache is not supported in the device. FUA depends on
@ -4615,7 +4627,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
if (rv) {
dev_warn(&pdev->dev,
"Unable to enable MSI interrupt.\n");
goto block_initialize_err;
goto msi_initialize_err;
}
/* Initialize the block layer. */
@ -4645,6 +4657,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,
block_initialize_err:
pci_disable_msi(pdev);
msi_initialize_err:
if (dd->isr_workq) {
flush_workqueue(dd->isr_workq);
destroy_workqueue(dd->isr_workq);

View file

@ -92,7 +92,7 @@
/* Driver name and version strings */
#define MTIP_DRV_NAME "mtip32xx"
#define MTIP_DRV_VERSION "1.3.0"
#define MTIP_DRV_VERSION "1.3.1"
/* Maximum number of minor device numbers per device. */
#define MTIP_MAX_MINORS 16

View file

@ -1836,31 +1836,16 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
/* Deregister the admin queue's interrupt */
free_irq(dev->entry[0].vector, adminq);
vecs = nr_io_queues;
for (i = 0; i < vecs; i++)
for (i = 0; i < nr_io_queues; i++)
dev->entry[i].entry = i;
for (;;) {
result = pci_enable_msix(pdev, dev->entry, vecs);
if (result <= 0)
break;
vecs = result;
}
if (result < 0) {
vecs = nr_io_queues;
if (vecs > 32)
vecs = 32;
for (;;) {
result = pci_enable_msi_block(pdev, vecs);
if (result == 0) {
for (i = 0; i < vecs; i++)
dev->entry[i].vector = i + pdev->irq;
break;
} else if (result < 0) {
vecs = 1;
break;
}
vecs = result;
vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues);
if (vecs < 0) {
vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32));
if (vecs < 0) {
vecs = 1;
} else {
for (i = 0; i < vecs; i++)
dev->entry[i].vector = i + pdev->irq;
}
}

View file

@ -3910,18 +3910,22 @@ static void skd_release_msix(struct skd_device *skdev)
struct skd_msix_entry *qentry;
int i;
if (skdev->msix_entries == NULL)
return;
for (i = 0; i < skdev->msix_count; i++) {
qentry = &skdev->msix_entries[i];
skdev = qentry->rsp;
if (skdev->msix_entries) {
for (i = 0; i < skdev->msix_count; i++) {
qentry = &skdev->msix_entries[i];
skdev = qentry->rsp;
if (qentry->have_irq)
devm_free_irq(&skdev->pdev->dev,
qentry->vector, qentry->rsp);
if (qentry->have_irq)
devm_free_irq(&skdev->pdev->dev,
qentry->vector, qentry->rsp);
}
kfree(skdev->msix_entries);
}
pci_disable_msix(skdev->pdev);
kfree(skdev->msix_entries);
if (skdev->msix_count)
pci_disable_msix(skdev->pdev);
skdev->msix_count = 0;
skdev->msix_entries = NULL;
}
@ -3929,12 +3933,10 @@ static void skd_release_msix(struct skd_device *skdev)
static int skd_acquire_msix(struct skd_device *skdev)
{
int i, rc;
struct pci_dev *pdev;
struct msix_entry *entries = NULL;
struct pci_dev *pdev = skdev->pdev;
struct msix_entry *entries;
struct skd_msix_entry *qentry;
pdev = skdev->pdev;
skdev->msix_count = SKD_MAX_MSIX_COUNT;
entries = kzalloc(sizeof(struct msix_entry) * SKD_MAX_MSIX_COUNT,
GFP_KERNEL);
if (!entries)
@ -3943,40 +3945,26 @@ static int skd_acquire_msix(struct skd_device *skdev)
for (i = 0; i < SKD_MAX_MSIX_COUNT; i++)
entries[i].entry = i;
rc = pci_enable_msix(pdev, entries, SKD_MAX_MSIX_COUNT);
if (rc < 0)
rc = pci_enable_msix_range(pdev, entries,
SKD_MIN_MSIX_COUNT, SKD_MAX_MSIX_COUNT);
if (rc < 0) {
pr_err("(%s): failed to enable MSI-X %d\n",
skd_name(skdev), rc);
goto msix_out;
if (rc) {
if (rc < SKD_MIN_MSIX_COUNT) {
pr_err("(%s): failed to enable MSI-X %d\n",
skd_name(skdev), rc);
goto msix_out;
}
pr_debug("%s:%s:%d %s: <%s> allocated %d MSI-X vectors\n",
skdev->name, __func__, __LINE__,
pci_name(pdev), skdev->name, rc);
skdev->msix_count = rc;
rc = pci_enable_msix(pdev, entries, skdev->msix_count);
if (rc) {
pr_err("(%s): failed to enable MSI-X "
"support (%d) %d\n",
skd_name(skdev), skdev->msix_count, rc);
goto msix_out;
}
}
skdev->msix_count = rc;
skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) *
skdev->msix_count, GFP_KERNEL);
if (!skdev->msix_entries) {
rc = -ENOMEM;
skdev->msix_count = 0;
pr_err("(%s): msix table allocation error\n",
skd_name(skdev));
goto msix_out;
}
qentry = skdev->msix_entries;
for (i = 0; i < skdev->msix_count; i++) {
qentry = &skdev->msix_entries[i];
qentry->vector = entries[i].vector;
qentry->entry = entries[i].entry;
qentry->rsp = NULL;
@ -3985,11 +3973,10 @@ static int skd_acquire_msix(struct skd_device *skdev)
skdev->name, __func__, __LINE__,
pci_name(pdev), skdev->name,
i, qentry->vector, qentry->entry);
qentry++;
}
/* Enable MSI-X vectors for the base queue */
for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) {
for (i = 0; i < skdev->msix_count; i++) {
qentry = &skdev->msix_entries[i];
snprintf(qentry->isr_name, sizeof(qentry->isr_name),
"%s%d-msix %s", DRV_NAME, skdev->devno,
@ -4045,8 +4032,8 @@ RETRY_IRQ_TYPE:
case SKD_IRQ_MSI:
snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d-msi",
DRV_NAME, skdev->devno);
rc = pci_enable_msi(pdev);
if (!rc) {
rc = pci_enable_msi_range(pdev, 1, 1);
if (rc > 0) {
rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, 0,
skdev->isr_name, skdev);
if (rc) {

View file

@ -30,6 +30,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <asm/io.h>
#include <asm/dbdma.h>
#include <asm/prom.h>
@ -840,14 +841,17 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state,
spin_lock_irqsave(&swim3_lock, flags);
if (fs->state != idle && fs->state != available) {
++fs->wanted;
while (fs->state != available) {
/* this will enable irqs in order to sleep */
if (!interruptible)
wait_event_lock_irq(fs->wait,
fs->state == available,
swim3_lock);
else if (wait_event_interruptible_lock_irq(fs->wait,
fs->state == available,
swim3_lock)) {
--fs->wanted;
spin_unlock_irqrestore(&swim3_lock, flags);
if (interruptible && signal_pending(current)) {
--fs->wanted;
return -EINTR;
}
interruptible_sleep_on(&fs->wait);
spin_lock_irqsave(&swim3_lock, flags);
return -EINTR;
}
--fs->wanted;
}

View file

@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG
Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous
operations that get stuck.
# cgroup code needs to be updated:
#
#config CGROUP_BCACHE
# bool "Cgroup controls for bcache"
# depends on BCACHE && BLK_CGROUP
# ---help---
# TODO

View file

@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);
if (CACHE_SYNC(&ca->set->sb)) {
ca->need_save_prio = max(ca->need_save_prio,
bucket_disk_gen(b));
WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
}
return ret;
}
@ -120,51 +114,45 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
mutex_unlock(&c->bucket_lock);
}
/* Allocation */
/*
* Background allocation thread: scans for buckets to be invalidated,
* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
* then optionally issues discard commands to the newly free buckets, then puts
* them on the various freelists.
*/
static inline bool can_inc_bucket_gen(struct bucket *b)
{
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX &&
bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
}
bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
BUG_ON(!ca->set->gc_mark_valid);
if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
unsigned i;
for (i = 0; i < RESERVE_NONE; i++)
if (!fifo_full(&ca->free[i]))
goto add;
return false;
}
add:
b->prio = 0;
if (can_inc_bucket_gen(b) &&
fifo_push(&ca->unused, b - ca->buckets)) {
atomic_inc(&b->pin);
return true;
}
return false;
}
static bool can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
return GC_MARK(b) == GC_MARK_RECLAIMABLE &&
return (!GC_MARK(b) ||
GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
!atomic_read(&b->pin) &&
can_inc_bucket_gen(b);
}
static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
lockdep_assert_held(&ca->set->bucket_lock);
BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
if (GC_SECTORS_USED(b))
trace_bcache_invalidate(ca, b - ca->buckets);
bch_inc_gen(ca, b);
b->prio = INITIAL_PRIO;
atomic_inc(&b->pin);
}
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
__bch_invalidate_one_bucket(ca, b);
fifo_push(&ca->free_inc, b - ca->buckets);
}
@ -195,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
ca->heap.used = 0;
for_each_bucket(b, ca) {
/*
* If we fill up the unused list, if we then return before
* adding anything to the free_inc list we'll skip writing
* prios/gens and just go back to allocating from the unused
* list:
*/
if (fifo_full(&ca->unused))
return;
if (!can_invalidate_bucket(ca, b))
continue;
if (!GC_SECTORS_USED(b) &&
bch_bucket_add_unused(ca, b))
if (!bch_can_invalidate_bucket(ca, b))
continue;
if (!heap_full(&ca->heap))
@ -233,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
return;
}
invalidate_one_bucket(ca, b);
bch_invalidate_one_bucket(ca, b);
}
}
@ -249,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)
b = ca->buckets + ca->fifo_last_bucket++;
if (can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b);
if (bch_can_invalidate_bucket(ca, b))
bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets) {
ca->invalidate_needs_gc = 1;
@ -274,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)
b = ca->buckets + n;
if (can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b);
if (bch_can_invalidate_bucket(ca, b))
bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets / 2) {
ca->invalidate_needs_gc = 1;
@ -287,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)
static void invalidate_buckets(struct cache *ca)
{
if (ca->invalidate_needs_gc)
return;
BUG_ON(ca->invalidate_needs_gc);
switch (CACHE_REPLACEMENT(&ca->sb)) {
case CACHE_REPLACEMENT_LRU:
@ -301,8 +275,6 @@ static void invalidate_buckets(struct cache *ca)
invalidate_buckets_random(ca);
break;
}
trace_bcache_alloc_invalidate(ca);
}
#define allocator_wait(ca, cond) \
@ -350,17 +322,10 @@ static int bch_allocator_thread(void *arg)
* possibly issue discards to them, then we add the bucket to
* the free list:
*/
while (1) {
while (!fifo_empty(&ca->free_inc)) {
long bucket;
if ((!atomic_read(&ca->set->prio_blocked) ||
!CACHE_SYNC(&ca->set->sb)) &&
!fifo_empty(&ca->unused))
fifo_pop(&ca->unused, bucket);
else if (!fifo_empty(&ca->free_inc))
fifo_pop(&ca->free_inc, bucket);
else
break;
fifo_pop(&ca->free_inc, bucket);
if (ca->discard) {
mutex_unlock(&ca->set->bucket_lock);
@ -371,6 +336,7 @@ static int bch_allocator_thread(void *arg)
}
allocator_wait(ca, bch_allocator_push(ca, bucket));
wake_up(&ca->set->btree_cache_wait);
wake_up(&ca->set->bucket_wait);
}
@ -380,9 +346,9 @@ static int bch_allocator_thread(void *arg)
* them to the free_inc list:
*/
retry_invalidate:
allocator_wait(ca, ca->set->gc_mark_valid &&
(ca->need_save_prio > 64 ||
!ca->invalidate_needs_gc));
!ca->invalidate_needs_gc);
invalidate_buckets(ca);
/*
@ -390,13 +356,28 @@ static int bch_allocator_thread(void *arg)
* new stuff to them:
*/
allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
if (CACHE_SYNC(&ca->set->sb) &&
(!fifo_empty(&ca->free_inc) ||
ca->need_save_prio > 64))
if (CACHE_SYNC(&ca->set->sb)) {
/*
* This could deadlock if an allocation with a btree
* node locked ever blocked - having the btree node
* locked would block garbage collection, but here we're
* waiting on garbage collection before we invalidate
* and free anything.
*
* But this should be safe since the btree code always
* uses btree_check_reserve() before allocating now, and
* if it fails it blocks without btree nodes locked.
*/
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
bch_prio_write(ca);
}
}
}
/* Allocation */
long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{
DEFINE_WAIT(w);
@ -408,8 +389,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
fifo_pop(&ca->free[reserve], r))
goto out;
if (!wait)
if (!wait) {
trace_bcache_alloc_fail(ca, reserve);
return -1;
}
do {
prepare_to_wait(&ca->set->bucket_wait, &w,
@ -425,6 +408,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
out:
wake_up_process(ca->alloc_thread);
trace_bcache_alloc(ca, reserve);
if (expensive_debug_checks(ca->set)) {
size_t iter;
long i;
@ -438,8 +423,6 @@ out:
BUG_ON(i == r);
fifo_for_each(i, &ca->free_inc, iter)
BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter)
BUG_ON(i == r);
}
b = ca->buckets + r;
@ -461,17 +444,19 @@ out:
return r;
}
void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
}
void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) {
struct bucket *b = PTR_BUCKET(c, k, i);
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
SET_GC_SECTORS_USED(b, 0);
bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
}
for (i = 0; i < KEY_PTRS(k); i++)
__bch_bucket_free(PTR_CACHE(c, k, i),
PTR_BUCKET(c, k, i));
}
int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
@ -709,25 +694,3 @@ int bch_cache_allocator_start(struct cache *ca)
ca->alloc_thread = k;
return 0;
}
int bch_cache_allocator_init(struct cache *ca)
{
/*
* Reserve:
* Prio/gen writes first
* Then 8 for btree allocations
* Then half for the moving garbage collector
*/
#if 0
ca->watermark[WATERMARK_PRIO] = 0;
ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
ca->watermark[WATERMARK_MOVINGGC] = 8 +
ca->watermark[WATERMARK_METADATA];
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC];
#endif
return 0;
}

View file

@ -195,9 +195,7 @@ struct bucket {
atomic_t pin;
uint16_t prio;
uint8_t gen;
uint8_t disk_gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint8_t gc_gen;
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
};
@ -207,9 +205,9 @@ struct bucket {
*/
BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_RECLAIMABLE 0
#define GC_MARK_DIRTY 1
#define GC_MARK_METADATA 2
#define GC_MARK_RECLAIMABLE 1
#define GC_MARK_DIRTY 2
#define GC_MARK_METADATA 3
#define GC_SECTORS_USED_SIZE 13
#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
@ -426,14 +424,9 @@ struct cache {
* their new gen to disk. After prio_write() finishes writing the new
* gens/prios, they'll be moved to the free list (and possibly discarded
* in the process)
*
* unused: GC found nothing pointing into these buckets (possibly
* because all the data they contained was overwritten), so we only
* need to discard them before they can be moved to the free list.
*/
DECLARE_FIFO(long, free)[RESERVE_NR];
DECLARE_FIFO(long, free_inc);
DECLARE_FIFO(long, unused);
size_t fifo_last_bucket;
@ -442,12 +435,6 @@ struct cache {
DECLARE_HEAP(struct bucket *, heap);
/*
* max(gen - disk_gen) for all buckets. When it gets too big we have to
* call prio_write() to keep gens from wrapping.
*/
uint8_t need_save_prio;
/*
* If nonzero, we know we aren't going to find any buckets to invalidate
* until a gc finishes - otherwise we could pointlessly burn a ton of
@ -562,19 +549,16 @@ struct cache_set {
struct list_head btree_cache_freed;
/* Number of elements in btree_cache + btree_cache_freeable lists */
unsigned bucket_cache_used;
unsigned btree_cache_used;
/*
* If we need to allocate memory for a new btree node and that
* allocation fails, we can cannibalize another node in the btree cache
* to satisfy the allocation. However, only one thread can be doing this
* at a time, for obvious reasons - try_harder and try_wait are
* basically a lock for this that we can wait on asynchronously. The
* btree_root() macro releases the lock when it returns.
* to satisfy the allocation - lock to guarantee only one thread does
* this at a time:
*/
struct task_struct *try_harder;
wait_queue_head_t try_wait;
uint64_t try_harder_start;
wait_queue_head_t btree_cache_wait;
struct task_struct *btree_cache_alloc_lock;
/*
* When we free a btree node, we increment the gen of the bucket the
@ -603,7 +587,7 @@ struct cache_set {
uint16_t min_prio;
/*
* max(gen - gc_gen) for all buckets. When it gets too big we have to gc
* max(gen - last_gc) for all buckets. When it gets too big we have to gc
* to keep gens from wrapping around.
*/
uint8_t need_gc;
@ -628,6 +612,8 @@ struct cache_set {
/* Number of moving GC bios in flight */
struct semaphore moving_in_flight;
struct workqueue_struct *moving_gc_wq;
struct btree *root;
#ifdef CONFIG_BCACHE_DEBUG
@ -667,7 +653,6 @@ struct cache_set {
struct time_stats btree_gc_time;
struct time_stats btree_split_time;
struct time_stats btree_read_time;
struct time_stats try_harder_time;
atomic_long_t cache_read_races;
atomic_long_t writeback_keys_done;
@ -850,9 +835,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
/*
* bucket_gc_gen() returns the difference between the bucket's current gen and
* the oldest gen of any pointer into that bucket in the btree (last_gc).
*
* bucket_disk_gen() returns the difference between the current gen and the gen
* on disk; they're both used to make sure gens don't wrap around.
*/
static inline uint8_t bucket_gc_gen(struct bucket *b)
@ -860,13 +842,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
return b->gen - b->last_gc;
}
static inline uint8_t bucket_disk_gen(struct bucket *b)
{
return b->gen - b->disk_gen;
}
#define BUCKET_GC_GEN_MAX 96U
#define BUCKET_DISK_GEN_MAX 64U
#define kobj_attribute_write(n, fn) \
static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
@ -899,11 +875,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *);
long bch_bucket_alloc(struct cache *, unsigned, bool);
bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
void __bch_bucket_free(struct cache *, struct bucket *);
void bch_bucket_free(struct cache_set *, struct bkey *);
long bch_bucket_alloc(struct cache *, unsigned, bool);
int __bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned,
@ -954,13 +933,10 @@ int bch_open_buckets_alloc(struct cache_set *);
void bch_open_buckets_free(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca);
int bch_cache_allocator_init(struct cache *ca);
void bch_debug_exit(void);
int bch_debug_init(struct kobject *);
void bch_request_exit(void);
int bch_request_init(void);
void bch_btree_exit(void);
int bch_btree_init(void);
#endif /* _BCACHE_H */

View file

@ -23,8 +23,8 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
for (k = i->start; k < bset_bkey_last(i); k = next) {
next = bkey_next(k);
printk(KERN_ERR "block %u key %li/%u: ", set,
(uint64_t *) k - i->d, i->keys);
printk(KERN_ERR "block %u key %u/%u: ", set,
(unsigned) ((u64 *) k - i->d), i->keys);
if (b->ops->key_dump)
b->ops->key_dump(b, k);

View file

@ -478,6 +478,12 @@ static inline void bch_keylist_init(struct keylist *l)
l->top_p = l->keys_p = l->inline_keys;
}
static inline void bch_keylist_init_single(struct keylist *l, struct bkey *k)
{
l->keys = k;
l->top = bkey_next(k);
}
static inline void bch_keylist_push(struct keylist *l)
{
l->top = bkey_next(l->top);

File diff suppressed because it is too large Load diff

View file

@ -127,6 +127,8 @@ struct btree {
struct cache_set *c;
struct btree *parent;
struct mutex write_lock;
unsigned long flags;
uint16_t written; /* would be nice to kill */
uint8_t level;
@ -236,11 +238,13 @@ static inline void rw_unlock(bool w, struct btree *b)
}
void bch_btree_node_read_done(struct btree *);
void __bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_node_write(struct btree *, struct closure *);
void bch_btree_set_root(struct btree *);
struct btree *bch_btree_node_alloc(struct cache_set *, int, bool);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool);
struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
struct bkey *, int, bool);
int bch_btree_insert_check_key(struct btree *, struct btree_op *,
struct bkey *);
@ -248,10 +252,10 @@ int bch_btree_insert(struct cache_set *, struct keylist *,
atomic_t *, struct bkey *);
int bch_gc_thread_start(struct cache_set *);
size_t bch_btree_gc_finish(struct cache_set *);
void bch_initial_gc_finish(struct cache_set *);
void bch_moving_gc(struct cache_set *);
int bch_btree_check(struct cache_set *);
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
static inline void wake_up_gc(struct cache_set *c)
{

View file

@ -194,9 +194,9 @@ err:
mutex_unlock(&b->c->bucket_lock);
bch_extent_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
"inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
g->prio, g->gen, g->last_gc, GC_MARK(g));
return true;
}
@ -308,6 +308,16 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
return NULL;
}
static void bch_subtract_dirty(struct bkey *k,
struct cache_set *c,
uint64_t offset,
int sectors)
{
if (KEY_DIRTY(k))
bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
offset, -sectors);
}
static bool bch_extent_insert_fixup(struct btree_keys *b,
struct bkey *insert,
struct btree_iter *iter,
@ -315,13 +325,6 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
{
struct cache_set *c = container_of(b, struct btree, keys)->c;
void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
{
if (KEY_DIRTY(k))
bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
offset, -sectors);
}
uint64_t old_offset;
unsigned old_size, sectors_found = 0;
@ -398,7 +401,8 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
struct bkey *top;
subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
bch_subtract_dirty(k, c, KEY_START(insert),
KEY_SIZE(insert));
if (bkey_written(b, k)) {
/*
@ -448,7 +452,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
}
}
subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
}
check_failed:
@ -499,9 +503,9 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
if (mutex_trylock(&b->c->bucket_lock)) {
if (b->c->gc_mark_valid &&
((GC_MARK(g) != GC_MARK_DIRTY &&
KEY_DIRTY(k)) ||
GC_MARK(g) == GC_MARK_METADATA))
(!GC_MARK(g) ||
GC_MARK(g) == GC_MARK_METADATA ||
(GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
goto err;
if (g->prio == BTREE_PRIO)
@ -515,9 +519,9 @@ err:
mutex_unlock(&b->c->bucket_lock);
bch_extent_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
g->prio, g->gen, g->last_gc, GC_MARK(g));
return true;
}

View file

@ -237,8 +237,14 @@ bsearch:
for (i = 0; i < ca->sb.njournal_buckets; i++)
if (ja->seq[i] > seq) {
seq = ja->seq[i];
ja->cur_idx = ja->discard_idx =
ja->last_idx = i;
/*
* When journal_reclaim() goes to allocate for
* the first time, it'll use the bucket after
* ja->cur_idx
*/
ja->cur_idx = i;
ja->last_idx = ja->discard_idx = (i + 1) %
ca->sb.njournal_buckets;
}
}
@ -288,16 +294,11 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
k = bkey_next(k)) {
unsigned j;
for (j = 0; j < KEY_PTRS(k); j++) {
struct bucket *g = PTR_BUCKET(c, k, j);
atomic_inc(&g->pin);
for (j = 0; j < KEY_PTRS(k); j++)
if (ptr_available(c, k, j))
atomic_inc(&PTR_BUCKET(c, k, j)->pin);
if (g->prio == BTREE_PRIO &&
!ptr_stale(c, k, j))
g->prio = INITIAL_PRIO;
}
__bch_btree_mark_key(c, 0, k);
bch_initial_mark_key(c, 0, k);
}
}
}
@ -312,8 +313,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
uint64_t start = i->j.last_seq, end = i->j.seq, n = start;
struct keylist keylist;
bch_keylist_init(&keylist);
list_for_each_entry(i, list, list) {
BUG_ON(i->pin && atomic_read(i->pin) != 1);
@ -326,8 +325,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
k = bkey_next(k)) {
trace_bcache_journal_replay_key(k);
bkey_copy(keylist.top, k);
bch_keylist_push(&keylist);
bch_keylist_init_single(&keylist, k);
ret = bch_btree_insert(s, &keylist, i->pin, NULL);
if (ret)
@ -383,16 +381,15 @@ retry:
b = best;
if (b) {
rw_lock(true, b, b->level);
mutex_lock(&b->write_lock);
if (!btree_current_write(b)->journal) {
rw_unlock(true, b);
mutex_unlock(&b->write_lock);
/* We raced */
goto retry;
}
bch_btree_node_write(b, NULL);
rw_unlock(true, b);
__bch_btree_node_write(b, NULL);
mutex_unlock(&b->write_lock);
}
}
@ -536,6 +533,7 @@ void bch_journal_next(struct journal *j)
atomic_set(&fifo_back(&j->pin), 1);
j->cur->data->seq = ++j->seq;
j->cur->dirty = false;
j->cur->need_write = false;
j->cur->data->keys = 0;
@ -731,7 +729,10 @@ static void journal_write_work(struct work_struct *work)
struct cache_set,
journal.work);
spin_lock(&c->journal.lock);
journal_try_write(c);
if (c->journal.cur->dirty)
journal_try_write(c);
else
spin_unlock(&c->journal.lock);
}
/*
@ -761,7 +762,8 @@ atomic_t *bch_journal(struct cache_set *c,
if (parent) {
closure_wait(&w->wait, parent);
journal_try_write(c);
} else if (!w->need_write) {
} else if (!w->dirty) {
w->dirty = true;
schedule_delayed_work(&c->journal.work,
msecs_to_jiffies(c->journal_delay_ms));
spin_unlock(&c->journal.lock);

View file

@ -95,6 +95,7 @@ struct journal_write {
struct cache_set *c;
struct closure_waitlist wait;
bool dirty;
bool need_write;
};

View file

@ -24,12 +24,10 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
moving_gc_keys);
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) {
struct bucket *g = PTR_BUCKET(c, k, i);
if (GC_MOVE(g))
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
GC_MOVE(PTR_BUCKET(c, k, i)))
return true;
}
return false;
}
@ -115,7 +113,7 @@ static void write_moving(struct closure *cl)
closure_call(&op->cl, bch_data_insert, NULL, cl);
}
continue_at(cl, write_moving_finish, system_wq);
continue_at(cl, write_moving_finish, op->wq);
}
static void read_moving_submit(struct closure *cl)
@ -125,7 +123,7 @@ static void read_moving_submit(struct closure *cl)
bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
continue_at(cl, write_moving, system_wq);
continue_at(cl, write_moving, io->op.wq);
}
static void read_moving(struct cache_set *c)
@ -160,6 +158,7 @@ static void read_moving(struct cache_set *c)
io->w = w;
io->op.inode = KEY_INODE(&w->key);
io->op.c = c;
io->op.wq = c->moving_gc_wq;
moving_init(io);
bio = &io->bio.bio;
@ -216,7 +215,10 @@ void bch_moving_gc(struct cache_set *c)
ca->heap.used = 0;
for_each_bucket(b, ca) {
if (!GC_SECTORS_USED(b))
if (GC_MARK(b) == GC_MARK_METADATA ||
!GC_SECTORS_USED(b) ||
GC_SECTORS_USED(b) == ca->sb.bucket_size ||
atomic_read(&b->pin))
continue;
if (!heap_full(&ca->heap)) {

View file

@ -12,11 +12,9 @@
#include "request.h"
#include "writeback.h"
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/hash.h>
#include <linux/random.h>
#include "blk-cgroup.h"
#include <trace/events/bcache.h>
@ -27,171 +25,13 @@ struct kmem_cache *bch_search_cache;
static void bch_data_insert_start(struct closure *);
/* Cgroup interface */
#ifdef CONFIG_CGROUP_BCACHE
static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 };
static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup)
{
struct cgroup_subsys_state *css;
return cgroup &&
(css = cgroup_subsys_state(cgroup, bcache_subsys_id))
? container_of(css, struct bch_cgroup, css)
: &bcache_default_cgroup;
}
struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio)
{
struct cgroup_subsys_state *css = bio->bi_css
? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id)
: task_subsys_state(current, bcache_subsys_id);
return css
? container_of(css, struct bch_cgroup, css)
: &bcache_default_cgroup;
}
static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos)
{
char tmp[1024];
int len = bch_snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes,
cgroup_to_bcache(cgrp)->cache_mode + 1);
if (len < 0)
return len;
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft,
const char *buf)
{
int v = bch_read_string_list(buf, bch_cache_modes);
if (v < 0)
return v;
cgroup_to_bcache(cgrp)->cache_mode = v - 1;
return 0;
}
static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft)
{
return cgroup_to_bcache(cgrp)->verify;
}
static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
cgroup_to_bcache(cgrp)->verify = val;
return 0;
}
static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_hits);
}
static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_misses);
}
static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp,
struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_bypass_hits);
}
static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp,
struct cftype *cft)
{
struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp);
return atomic_read(&bcachecg->stats.cache_bypass_misses);
}
static struct cftype bch_files[] = {
{
.name = "cache_mode",
.read = cache_mode_read,
.write_string = cache_mode_write,
},
{
.name = "verify",
.read_u64 = bch_verify_read,
.write_u64 = bch_verify_write,
},
{
.name = "cache_hits",
.read_u64 = bch_cache_hits_read,
},
{
.name = "cache_misses",
.read_u64 = bch_cache_misses_read,
},
{
.name = "cache_bypass_hits",
.read_u64 = bch_cache_bypass_hits_read,
},
{
.name = "cache_bypass_misses",
.read_u64 = bch_cache_bypass_misses_read,
},
{ } /* terminate */
};
static void init_bch_cgroup(struct bch_cgroup *cg)
{
cg->cache_mode = -1;
}
static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup)
{
struct bch_cgroup *cg;
cg = kzalloc(sizeof(*cg), GFP_KERNEL);
if (!cg)
return ERR_PTR(-ENOMEM);
init_bch_cgroup(cg);
return &cg->css;
}
static void bcachecg_destroy(struct cgroup *cgroup)
{
struct bch_cgroup *cg = cgroup_to_bcache(cgroup);
kfree(cg);
}
struct cgroup_subsys bcache_subsys = {
.create = bcachecg_create,
.destroy = bcachecg_destroy,
.subsys_id = bcache_subsys_id,
.name = "bcache",
.module = THIS_MODULE,
};
EXPORT_SYMBOL_GPL(bcache_subsys);
#endif
static unsigned cache_mode(struct cached_dev *dc, struct bio *bio)
{
#ifdef CONFIG_CGROUP_BCACHE
int r = bch_bio_to_cgroup(bio)->cache_mode;
if (r >= 0)
return r;
#endif
return BDEV_CACHE_MODE(&dc->sb);
}
static bool verify(struct cached_dev *dc, struct bio *bio)
{
#ifdef CONFIG_CGROUP_BCACHE
if (bch_bio_to_cgroup(bio)->verify)
return true;
#endif
return dc->verify;
}
@ -248,7 +88,7 @@ static void bch_data_insert_keys(struct closure *cl)
atomic_dec_bug(journal_ref);
if (!op->insert_data_done)
continue_at(cl, bch_data_insert_start, bcache_wq);
continue_at(cl, bch_data_insert_start, op->wq);
bch_keylist_free(&op->insert_keys);
closure_return(cl);
@ -297,7 +137,7 @@ static void bch_data_invalidate(struct closure *cl)
op->insert_data_done = true;
bio_put(bio);
out:
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
}
static void bch_data_insert_error(struct closure *cl)
@ -340,7 +180,7 @@ static void bch_data_insert_endio(struct bio *bio, int error)
if (op->writeback)
op->error = error;
else if (!op->replace)
set_closure_fn(cl, bch_data_insert_error, bcache_wq);
set_closure_fn(cl, bch_data_insert_error, op->wq);
else
set_closure_fn(cl, NULL, NULL);
}
@ -376,7 +216,7 @@ static void bch_data_insert_start(struct closure *cl)
if (bch_keylist_realloc(&op->insert_keys,
3 + (op->csum ? 1 : 0),
op->c))
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
k = op->insert_keys.top;
bkey_init(k);
@ -413,7 +253,7 @@ static void bch_data_insert_start(struct closure *cl)
} while (n != bio);
op->insert_data_done = true;
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
err:
/* bch_alloc_sectors() blocks if s->writeback = true */
BUG_ON(op->writeback);
@ -442,7 +282,7 @@ err:
bio_put(bio);
if (!bch_keylist_empty(&op->insert_keys))
continue_at(cl, bch_data_insert_keys, bcache_wq);
continue_at(cl, bch_data_insert_keys, op->wq);
else
closure_return(cl);
}
@ -824,6 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.error = 0;
s->iop.flags = 0;
s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
s->iop.wq = bcache_wq;
return s;
}
@ -1203,22 +1044,13 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
static int flash_dev_cache_miss(struct btree *b, struct search *s,
struct bio *bio, unsigned sectors)
{
struct bio_vec bv;
struct bvec_iter iter;
unsigned bytes = min(sectors, bio_sectors(bio)) << 9;
/* Zero fill bio */
swap(bio->bi_iter.bi_size, bytes);
zero_fill_bio(bio);
swap(bio->bi_iter.bi_size, bytes);
bio_for_each_segment(bv, bio, iter) {
unsigned j = min(bv.bv_len >> 9, sectors);
void *p = kmap(bv.bv_page);
memset(p + bv.bv_offset, 0, j << 9);
kunmap(bv.bv_page);
sectors -= j;
}
bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size));
bio_advance(bio, bytes);
if (!bio->bi_iter.bi_size)
return MAP_DONE;
@ -1313,9 +1145,6 @@ void bch_flash_dev_request_init(struct bcache_device *d)
void bch_request_exit(void)
{
#ifdef CONFIG_CGROUP_BCACHE
cgroup_unload_subsys(&bcache_subsys);
#endif
if (bch_search_cache)
kmem_cache_destroy(bch_search_cache);
}
@ -1326,11 +1155,5 @@ int __init bch_request_init(void)
if (!bch_search_cache)
return -ENOMEM;
#ifdef CONFIG_CGROUP_BCACHE
cgroup_load_subsys(&bcache_subsys);
init_bch_cgroup(&bcache_default_cgroup);
cgroup_add_cftypes(&bcache_subsys, bch_files);
#endif
return 0;
}

View file

@ -1,12 +1,11 @@
#ifndef _BCACHE_REQUEST_H_
#define _BCACHE_REQUEST_H_
#include <linux/cgroup.h>
struct data_insert_op {
struct closure cl;
struct cache_set *c;
struct bio *bio;
struct workqueue_struct *wq;
unsigned inode;
uint16_t write_point;
@ -41,20 +40,4 @@ void bch_flash_dev_request_init(struct bcache_device *d);
extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache;
struct bch_cgroup {
#ifdef CONFIG_CGROUP_BCACHE
struct cgroup_subsys_state css;
#endif
/*
* We subtract one from the index into bch_cache_modes[], so that
* default == -1; this makes it so the rest match up with d->cache_mode,
* and we use d->cache_mode if cgrp->cache_mode < 0
*/
short cache_mode;
bool verify;
struct cache_stat_collector stats;
};
struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio);
#endif /* _BCACHE_REQUEST_H_ */

View file

@ -201,9 +201,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
mark_cache_stats(&dc->accounting.collector, hit, bypass);
mark_cache_stats(&c->accounting.collector, hit, bypass);
#ifdef CONFIG_CGROUP_BCACHE
mark_cache_stats(&(bch_bio_to_cgroup(s->orig_bio)->stats), hit, bypass);
#endif
}
void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)

View file

@ -541,9 +541,6 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
closure_sync(cl);
}
#define buckets_free(c) "free %zu, free_inc %zu, unused %zu", \
fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused)
void bch_prio_write(struct cache *ca)
{
int i;
@ -554,10 +551,6 @@ void bch_prio_write(struct cache *ca)
lockdep_assert_held(&ca->set->bucket_lock);
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets; b++)
b->disk_gen = b->gen;
ca->disk_buckets->seq++;
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
@ -601,14 +594,17 @@ void bch_prio_write(struct cache *ca)
mutex_lock(&ca->set->bucket_lock);
ca->need_save_prio = 0;
/*
* Don't want the old priorities to get garbage collected until after we
* finish writing the new ones, and they're journalled
*/
for (i = 0; i < prio_buckets(ca); i++)
for (i = 0; i < prio_buckets(ca); i++) {
if (ca->prio_last_buckets[i])
__bch_bucket_free(ca,
&ca->buckets[ca->prio_last_buckets[i]]);
ca->prio_last_buckets[i] = ca->prio_buckets[i];
}
}
static void prio_read(struct cache *ca, uint64_t bucket)
@ -639,7 +635,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
}
b->prio = le16_to_cpu(d->prio);
b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen;
b->gen = b->last_gc = d->gen;
}
}
@ -843,6 +839,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
q->limits.max_segment_size = UINT_MAX;
q->limits.max_segments = BIO_MAX_PAGES;
q->limits.max_discard_sectors = UINT_MAX;
q->limits.discard_granularity = 512;
q->limits.io_min = block_size;
q->limits.logical_block_size = block_size;
q->limits.physical_block_size = block_size;
@ -1355,6 +1352,8 @@ static void cache_set_free(struct closure *cl)
bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
if (c->moving_gc_wq)
destroy_workqueue(c->moving_gc_wq);
if (c->bio_split)
bioset_free(c->bio_split);
if (c->fill_iter)
@ -1395,14 +1394,21 @@ static void cache_set_flush(struct closure *cl)
list_add(&c->root->list, &c->btree_cache);
/* Should skip this if we're unregistering because of an error */
list_for_each_entry(b, &c->btree_cache, list)
list_for_each_entry(b, &c->btree_cache, list) {
mutex_lock(&b->write_lock);
if (btree_node_dirty(b))
bch_btree_node_write(b, NULL);
__bch_btree_node_write(b, NULL);
mutex_unlock(&b->write_lock);
}
for_each_cache(ca, c, i)
if (ca->alloc_thread)
kthread_stop(ca->alloc_thread);
cancel_delayed_work_sync(&c->journal.work);
/* flush last journal entry if needed */
c->journal.work.work.func(&c->journal.work.work);
closure_return(cl);
}
@ -1485,14 +1491,13 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->try_wait);
init_waitqueue_head(&c->btree_cache_wait);
init_waitqueue_head(&c->bucket_wait);
sema_init(&c->uuid_write_mutex, 1);
spin_lock_init(&c->btree_gc_time.lock);
spin_lock_init(&c->btree_split_time.lock);
spin_lock_init(&c->btree_read_time.lock);
spin_lock_init(&c->try_harder_time.lock);
bch_moving_init_cache_set(c);
@ -1517,6 +1522,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = create_workqueue("bcache_gc")) ||
bch_journal_alloc(c) ||
bch_btree_cache_alloc(c) ||
bch_open_buckets_alloc(c) ||
@ -1580,7 +1586,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "error reading btree root";
c->root = bch_btree_node_get(c, k, j->btree_level, true);
c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
if (IS_ERR_OR_NULL(c->root))
goto err;
@ -1596,7 +1602,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
bch_journal_mark(c, &journal);
bch_btree_gc_finish(c);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
/*
@ -1638,7 +1644,7 @@ static void run_cache_set(struct cache_set *c)
ca->sb.d[j] = ca->sb.first_bucket + j;
}
bch_btree_gc_finish(c);
bch_initial_gc_finish(c);
err = "error starting allocator thread";
for_each_cache(ca, c, i)
@ -1655,12 +1661,14 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "cannot allocate new btree root";
c->root = bch_btree_node_alloc(c, 0, true);
c->root = bch_btree_node_alloc(c, NULL, 0);
if (IS_ERR_OR_NULL(c->root))
goto err;
mutex_lock(&c->root->write_lock);
bkey_copy_key(&c->root->key, &MAX_KEY);
bch_btree_node_write(c->root, &cl);
mutex_unlock(&c->root->write_lock);
bch_btree_set_root(c->root);
rw_unlock(true, c->root);
@ -1782,7 +1790,6 @@ void bch_cache_release(struct kobject *kobj)
vfree(ca->buckets);
free_heap(&ca->heap);
free_fifo(&ca->unused);
free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++)
@ -1819,7 +1826,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
!(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->sb.nbuckets)) ||
@ -1834,13 +1840,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
for_each_bucket(b, ca)
atomic_set(&b->pin, 0);
if (bch_cache_allocator_init(ca))
goto err;
return 0;
err:
kobject_put(&ca->kobj);
return -ENOMEM;
}
static void register_cache(struct cache_sb *sb, struct page *sb_page,
@ -1869,7 +1869,10 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
goto err;
mutex_lock(&bch_register_lock);
err = register_cache_set(ca);
mutex_unlock(&bch_register_lock);
if (err)
goto err;
@ -1931,8 +1934,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!try_module_get(THIS_MODULE))
return -EBUSY;
mutex_lock(&bch_register_lock);
if (!(path = kstrndup(buffer, size, GFP_KERNEL)) ||
!(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL)))
goto err;
@ -1965,7 +1966,9 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!dc)
goto err_close;
mutex_lock(&bch_register_lock);
register_bdev(sb, sb_page, bdev, dc);
mutex_unlock(&bch_register_lock);
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
@ -1978,7 +1981,6 @@ out:
put_page(sb_page);
kfree(sb);
kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE);
return ret;
@ -2057,7 +2059,6 @@ static void bcache_exit(void)
{
bch_debug_exit();
bch_request_exit();
bch_btree_exit();
if (bcache_kobj)
kobject_put(bcache_kobj);
if (bcache_wq)
@ -2087,7 +2088,6 @@ static int __init bcache_init(void)
if (!(bcache_wq = create_workqueue("bcache")) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
sysfs_create_files(bcache_kobj, files) ||
bch_btree_init() ||
bch_request_init() ||
bch_debug_init(bcache_kobj))
goto err;

View file

@ -54,7 +54,6 @@ sysfs_time_stats_attribute(btree_gc, sec, ms);
sysfs_time_stats_attribute(btree_split, sec, us);
sysfs_time_stats_attribute(btree_sort, ms, us);
sysfs_time_stats_attribute(btree_read, ms, us);
sysfs_time_stats_attribute(try_harder, ms, us);
read_attribute(btree_nodes);
read_attribute(btree_used_percent);
@ -406,7 +405,7 @@ struct bset_stats_op {
struct bset_stats stats;
};
static int btree_bset_stats(struct btree_op *b_op, struct btree *b)
static int bch_btree_bset_stats(struct btree_op *b_op, struct btree *b)
{
struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);
@ -424,7 +423,7 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
memset(&op, 0, sizeof(op));
bch_btree_op_init(&op.op, -1);
ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, btree_bset_stats);
ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, bch_btree_bset_stats);
if (ret < 0)
return ret;
@ -442,81 +441,81 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
op.stats.floats, op.stats.failed);
}
static unsigned bch_root_usage(struct cache_set *c)
{
unsigned bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
goto lock_root;
do {
rw_unlock(false, b);
lock_root:
b = c->root;
rw_lock(false, b, b->level);
} while (b != c->root);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
bytes += bkey_bytes(k);
rw_unlock(false, b);
return (bytes * 100) / btree_bytes(c);
}
static size_t bch_cache_size(struct cache_set *c)
{
size_t ret = 0;
struct btree *b;
mutex_lock(&c->bucket_lock);
list_for_each_entry(b, &c->btree_cache, list)
ret += 1 << (b->keys.page_order + PAGE_SHIFT);
mutex_unlock(&c->bucket_lock);
return ret;
}
static unsigned bch_cache_max_chain(struct cache_set *c)
{
unsigned ret = 0;
struct hlist_head *h;
mutex_lock(&c->bucket_lock);
for (h = c->bucket_hash;
h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
h++) {
unsigned i = 0;
struct hlist_node *p;
hlist_for_each(p, h)
i++;
ret = max(ret, i);
}
mutex_unlock(&c->bucket_lock);
return ret;
}
static unsigned bch_btree_used(struct cache_set *c)
{
return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
static unsigned bch_average_key_size(struct cache_set *c)
{
return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
: 0;
}
SHOW(__bch_cache_set)
{
unsigned root_usage(struct cache_set *c)
{
unsigned bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
goto lock_root;
do {
rw_unlock(false, b);
lock_root:
b = c->root;
rw_lock(false, b, b->level);
} while (b != c->root);
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
bytes += bkey_bytes(k);
rw_unlock(false, b);
return (bytes * 100) / btree_bytes(c);
}
size_t cache_size(struct cache_set *c)
{
size_t ret = 0;
struct btree *b;
mutex_lock(&c->bucket_lock);
list_for_each_entry(b, &c->btree_cache, list)
ret += 1 << (b->keys.page_order + PAGE_SHIFT);
mutex_unlock(&c->bucket_lock);
return ret;
}
unsigned cache_max_chain(struct cache_set *c)
{
unsigned ret = 0;
struct hlist_head *h;
mutex_lock(&c->bucket_lock);
for (h = c->bucket_hash;
h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
h++) {
unsigned i = 0;
struct hlist_node *p;
hlist_for_each(p, h)
i++;
ret = max(ret, i);
}
mutex_unlock(&c->bucket_lock);
return ret;
}
unsigned btree_used(struct cache_set *c)
{
return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
unsigned average_key_size(struct cache_set *c)
{
return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
: 0;
}
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
sysfs_print(synchronous, CACHE_SYNC(&c->sb));
@ -524,21 +523,20 @@ lock_root:
sysfs_hprint(bucket_size, bucket_bytes(c));
sysfs_hprint(block_size, block_bytes(c));
sysfs_print(tree_depth, c->root->level);
sysfs_print(root_usage_percent, root_usage(c));
sysfs_print(root_usage_percent, bch_root_usage(c));
sysfs_hprint(btree_cache_size, cache_size(c));
sysfs_print(btree_cache_max_chain, cache_max_chain(c));
sysfs_hprint(btree_cache_size, bch_cache_size(c));
sysfs_print(btree_cache_max_chain, bch_cache_max_chain(c));
sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use);
sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);
sysfs_print(btree_used_percent, btree_used(c));
sysfs_print(btree_used_percent, bch_btree_used(c));
sysfs_print(btree_nodes, c->gc_stats.nodes);
sysfs_hprint(average_key_size, average_key_size(c));
sysfs_hprint(average_key_size, bch_average_key_size(c));
sysfs_print(cache_read_races,
atomic_long_read(&c->cache_read_races));
@ -709,7 +707,6 @@ static struct attribute *bch_cache_set_internal_files[] = {
sysfs_time_stats_attribute_list(btree_split, sec, us)
sysfs_time_stats_attribute_list(btree_sort, ms, us)
sysfs_time_stats_attribute_list(btree_read, ms, us)
sysfs_time_stats_attribute_list(try_harder, ms, us)
&sysfs_btree_nodes,
&sysfs_btree_used_percent,
@ -761,7 +758,9 @@ SHOW(__bch_cache)
int cmp(const void *l, const void *r)
{ return *((uint16_t *) r) - *((uint16_t *) l); }
size_t n = ca->sb.nbuckets, i, unused, btree;
struct bucket *b;
size_t n = ca->sb.nbuckets, i;
size_t unused = 0, available = 0, dirty = 0, meta = 0;
uint64_t sum = 0;
/* Compute 31 quantiles */
uint16_t q[31], *p, *cached;
@ -772,6 +771,17 @@ SHOW(__bch_cache)
return -ENOMEM;
mutex_lock(&ca->set->bucket_lock);
for_each_bucket(b, ca) {
if (!GC_SECTORS_USED(b))
unused++;
if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
available++;
if (GC_MARK(b) == GC_MARK_DIRTY)
dirty++;
if (GC_MARK(b) == GC_MARK_METADATA)
meta++;
}
for (i = ca->sb.first_bucket; i < n; i++)
p[i] = ca->buckets[i].prio;
mutex_unlock(&ca->set->bucket_lock);
@ -786,10 +796,7 @@ SHOW(__bch_cache)
while (cached < p + n &&
*cached == BTREE_PRIO)
cached++;
btree = cached - p;
n -= btree;
cached++, n--;
for (i = 0; i < n; i++)
sum += INITIAL_PRIO - cached[i];
@ -805,12 +812,16 @@ SHOW(__bch_cache)
ret = scnprintf(buf, PAGE_SIZE,
"Unused: %zu%%\n"
"Clean: %zu%%\n"
"Dirty: %zu%%\n"
"Metadata: %zu%%\n"
"Average: %llu\n"
"Sectors per Q: %zu\n"
"Quantiles: [",
unused * 100 / (size_t) ca->sb.nbuckets,
btree * 100 / (size_t) ca->sb.nbuckets, sum,
available * 100 / (size_t) ca->sb.nbuckets,
dirty * 100 / (size_t) ca->sb.nbuckets,
meta * 100 / (size_t) ca->sb.nbuckets, sum,
n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
for (i = 0; i < ARRAY_SIZE(q); i++)

View file

@ -45,7 +45,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_invalidate);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);

View file

@ -327,12 +327,6 @@ enum drbd_state_rv {
SS_AFTER_LAST_ERROR = -22, /* Keep this at bottom */
};
/* from drbd_strings.c */
extern const char *drbd_conn_str(enum drbd_conns);
extern const char *drbd_role_str(enum drbd_role);
extern const char *drbd_disk_str(enum drbd_disk_state);
extern const char *drbd_set_st_err_str(enum drbd_state_rv);
#define SHARED_SECRET_MAX 64
#define MDF_CONSISTENT (1 << 0)
@ -382,4 +376,6 @@ enum drbd_timeout_flag {
#define DRBD_MD_INDEX_FLEX_EXT -2
#define DRBD_MD_INDEX_FLEX_INT -3
#define DRBD_CPU_MASK_SIZE 32
#endif

View file

@ -135,7 +135,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
)
GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
__str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32)
__str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, DRBD_CPU_MASK_SIZE)
__u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF)
)
@ -276,9 +276,9 @@ GENL_op(
)
/* add DRBD minor devices as volumes to resources */
GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_new_minor),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_del_minor),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
/* add or delete resources */

View file

@ -85,6 +85,7 @@ void idr_remove(struct idr *idp, int id);
void idr_free(struct idr *idp, int id);
void idr_destroy(struct idr *idp);
void idr_init(struct idr *idp);
bool idr_is_empty(struct idr *idp);
/**
* idr_preload_end - end preload section started with idr_preload()

View file

@ -399,26 +399,43 @@ TRACE_EVENT(bcache_keyscan,
/* Allocator */
TRACE_EVENT(bcache_alloc_invalidate,
TP_PROTO(struct cache *ca),
TP_ARGS(ca),
TRACE_EVENT(bcache_invalidate,
TP_PROTO(struct cache *ca, size_t bucket),
TP_ARGS(ca, bucket),
TP_STRUCT__entry(
__field(unsigned, free )
__field(unsigned, free_inc )
__field(unsigned, free_inc_size )
__field(unsigned, unused )
__field(unsigned, sectors )
__field(dev_t, dev )
__field(__u64, offset )
),
TP_fast_assign(
__entry->free = fifo_used(&ca->free[RESERVE_NONE]);
__entry->free_inc = fifo_used(&ca->free_inc);
__entry->free_inc_size = ca->free_inc.size;
__entry->unused = fifo_used(&ca->unused);
__entry->dev = ca->bdev->bd_dev;
__entry->offset = bucket << ca->set->bucket_bits;
__entry->sectors = GC_SECTORS_USED(&ca->buckets[bucket]);
),
TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
__entry->free_inc, __entry->free_inc_size, __entry->unused)
TP_printk("invalidated %u sectors at %d,%d sector=%llu",
__entry->sectors, MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->offset)
);
TRACE_EVENT(bcache_alloc,
TP_PROTO(struct cache *ca, size_t bucket),
TP_ARGS(ca, bucket),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(__u64, offset )
),
TP_fast_assign(
__entry->dev = ca->bdev->bd_dev;
__entry->offset = bucket << ca->set->bucket_bits;
),
TP_printk("allocated %d,%d sector=%llu", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->offset)
);
TRACE_EVENT(bcache_alloc_fail,
@ -426,21 +443,22 @@ TRACE_EVENT(bcache_alloc_fail,
TP_ARGS(ca, reserve),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(unsigned, free )
__field(unsigned, free_inc )
__field(unsigned, unused )
__field(unsigned, blocked )
),
TP_fast_assign(
__entry->dev = ca->bdev->bd_dev;
__entry->free = fifo_used(&ca->free[reserve]);
__entry->free_inc = fifo_used(&ca->free_inc);
__entry->unused = fifo_used(&ca->unused);
__entry->blocked = atomic_read(&ca->set->prio_blocked);
),
TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
__entry->free_inc, __entry->unused, __entry->blocked)
TP_printk("alloc fail %d,%d free %u free_inc %u blocked %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->free,
__entry->free_inc, __entry->blocked)
);
/* Background writeback */

View file

@ -869,6 +869,16 @@ void idr_init(struct idr *idp)
}
EXPORT_SYMBOL(idr_init);
static int idr_has_entry(int id, void *p, void *data)
{
return 1;
}
bool idr_is_empty(struct idr *idp)
{
return !idr_for_each(idp, idr_has_entry, NULL);
}
EXPORT_SYMBOL(idr_is_empty);
/**
* DOC: IDA description