This patch adds read balancing. The round-robin method is used, with
MIN_READS (128) requests going to each device.
From: Jonathan E Brassow <jbrassow@redhat.com>
[The 128 needs changing into a runtime parameter.]
Index: linux-2.6.19-rc4/drivers/md/dm-raid1.c
===================================================================
--- linux-2.6.19-rc4.orig/drivers/md/dm-raid1.c 2006-11-01 21:40:51.000000000 +0000
+++ linux-2.6.19-rc4/drivers/md/dm-raid1.c 2006-11-01 21:40:53.000000000 +0000
@@ -129,6 +129,9 @@ struct mirror_set {
struct workqueue_struct *kmirrord_wq;
struct work_struct kmirrord_work;
+ atomic_t read_count; /* Read counter for read balancing */
+ struct mirror *read_mirror; /* Last mirror read. */
+
unsigned int nr_mirrors;
struct mirror mirror[0];
};
@@ -684,10 +687,46 @@ static void do_recovery(struct mirror_se
/*-----------------------------------------------------------------
* Reads
*---------------------------------------------------------------*/
-static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
+/* Switch to next dev, via round-robin, after MIN_READS reads */
+#define MIN_READS 128
+
+/* choose_mirror
+ * @ms: the mirror set
+ *
+ * This function is used for read balancing.
+ *
+ * Returns: chosen mirror, or NULL on failure
+ */
+static struct mirror *choose_mirror(struct mirror_set *ms)
{
- /* FIXME: add read balancing */
- return ms->default_mirror;
+ struct mirror *start_mirror = ms->read_mirror;
+
+ /*
+ * Perform MIN_READS on each working mirror then
+ * advance to the next one. start_mirror stores
+ * the first we tried, so we know when we're done.
+ */
+ do {
+ if (likely(!atomic_read(&ms->read_mirror->error_count) &&
+ !atomic_dec_and_test(&ms->read_count)))
+ goto use_mirror;
+
+ atomic_set(&ms->read_count, MIN_READS);
+
+ if (ms->read_mirror-- == ms->mirror)
+ ms->read_mirror += ms->nr_mirrors;
+
+ } while (ms->read_mirror != start_mirror);
+
+ /*
+ * We've rejected every mirror.
+ * Confirm that start_mirror can still be used.
+ */
+ if (unlikely(atomic_read(&ms->read_mirror->error_count)))
+ return NULL;
+
+use_mirror:
+ return ms->read_mirror;
}
/*
@@ -712,7 +751,7 @@ static void do_reads(struct mirror_set *
* We can only read balance if the region is in sync.
*/
if (rh_in_sync(&ms->rh, region, 0))
- m = choose_mirror(ms, bio->bi_sector);
+ m = choose_mirror(ms);
else
m = ms->default_mirror;
@@ -898,6 +937,7 @@ static struct mirror_set *alloc_context(
ms->nr_regions = dm_sector_div_up(ti->len, region_size);
ms->in_sync = 0;
ms->default_mirror = &ms->mirror[DEFAULT_MIRROR];
+ ms->read_mirror = &ms->mirror[DEFAULT_MIRROR];
if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
ti->error = "Error creating dirty region hash";
@@ -1147,7 +1187,7 @@ static int mirror_map(struct dm_target *
return DM_MAPIO_SUBMITTED;
}
- m = choose_mirror(ms, bio->bi_sector);
+ m = choose_mirror(ms);
if (!m)
return -EIO;