This implements a loopback target for device mapper allowing a regular
file to be treated as a block device.
Signed-off-by: Bryn Reeves <breeves@redhat.com>
drivers/md/dm-loop.c | 648 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 648 insertions(+)
Index: linux-2.6.19/drivers/md/dm-loop.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.19/drivers/md/dm-loop.c 2006-12-06 20:49:43.000000000 +0000
@@ -0,0 +1,648 @@
+/*
+ * Copyright (C) 2006 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of device-mapper.
+ *
+ * Extent mapping implementation heavily influenced by mm/swapfile.c
+ *
+ * This file is released under the GPL.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/syscalls.h>
+#include <linux/file.h>
+#include <linux/bio.h>
+
+#include "dm.h"
+#include "dm-bio-list.h"
+#include "dm-bio-record.h"
+
+#define DM_MSG_PREFIX "loop"
+#define LOOP_MAX_EXTENTS 1024
+
+#define DMLOOP_READONLY 0x01
+#define DMLOOP_SYNC 0x02
+
+typedef enum {
+ DMLOOP_DEV
+} dm_extent_t;
+
+struct extent {
+ sector_t start;
+ sector_t len;
+ dm_extent_t type;
+ u64 data;
+};
+
+struct extent_map {
+ int nr_extents;
+ int cur_extent;
+ struct extent extents[0];
+};
+
+#define DMLOOP_MAP_SIZE(x) ((x)*sizeof(struct extent)+sizeof(struct extent_map))
+
+/* expect a struct extent_map *map */
+#define DMLOOP_EXTENT(x) ((struct extent *)(&map->extents[(x)]))
+#define DMLOOP_EXTENT_TYPE(x) ((x)->type)
+
+/* dm-loop context */
+struct loop_c {
+ int flags;
+
+ /* information describing the backing store */
+ struct file *filp;
+ struct block_device *bdev;
+ char name[BDEVNAME_SIZE + 1];
+ struct extent_map *map;
+ unsigned blkbits;
+ loff_t offset;
+
+ sector_t sectors; /* size of mapped area in sectors*/
+ loff_t size; /* size of entire file in bytes */
+
+ char *loop_path;
+};
+
+#ifdef CONFIG_DM_DEBUG
+static void dump_extent(struct extent *e)
+{
+ const char types[] = { 'f', 'd' };
+
+ if (!e)
+ return;
+
+ if (e->type != DMLOOP_DEV) {
+ DMWARN("unknown extent type in map, skipping.");
+ return;
+ }
+
+ DMDEBUG("start: %8llu len: %4llu %4c.rstart: %8llu",
+ e->start, e->len, types[e->type],
+ (sector_t)e->data );
+}
+
+static void dump_extent_map(struct extent_map *map)
+{
+ unsigned i;
+
+ if (!map)
+ return;
+
+ DMDEBUG("extent map (nr_extents = %d, cur_extent = %d)",
+ map->nr_extents, map->cur_extent);
+
+ for (i = 0; i < map->nr_extents; i++)
+ dump_extent(DMLOOP_EXTENT(i));
+}
+
+#else /* CONFIG_DM_DEBUG */
+#define dump_extent_map(a)
+#endif /* DMLOOP_TRACE */
+
+static struct extent_map *finalize_map(struct extent_map * map)
+{
+ struct extent_map *_map;
+
+ if (!map)
+ goto out;
+
+ _map = kmalloc(DMLOOP_MAP_SIZE(map->nr_extents), GFP_KERNEL);
+ DMDEBUG("attempted to re-allocate extent map and header to %u bytes",
+ DMLOOP_MAP_SIZE(map->nr_extents));
+
+ if (!_map) {
+ DMERR("Could not re-allocate final extent map");
+ kfree(map);
+ goto out;
+ }
+
+ memcpy(_map, map, DMLOOP_MAP_SIZE(map->nr_extents));
+ kfree(map);
+ return _map;
+out:
+ return NULL;
+}
+
+#define _ADD_EXTENT(s, l, t) \
+do{ \
+ DMLOOP_EXTENT((nr_extents))->start = (s); \
+ DMLOOP_EXTENT((nr_extents))->len = (l); \
+ DMLOOP_EXTENT((nr_extents))->type = (t); \
+ ((nr_extents++)); \
+} while(0);
+
+#define ADD_DEV_EXTENT(s, l, r) \
+do { \
+ map->extents[nr_extents].data = (u64)r; \
+ _ADD_EXTENT(s, l, DMLOOP_DEV) \
+} while(0);
+
+static int setup_loop_extents(struct loop_c *lc)
+{
+ struct extent_map *map;
+ struct inode *inode;
+ unsigned blkbits;
+ unsigned shiftbits;
+ sector_t probe_block;
+ sector_t last_block;
+ sector_t start = 0;
+ int nr_extents = 0;
+
+ map = kzalloc(DMLOOP_MAP_SIZE(LOOP_MAX_EXTENTS), GFP_KERNEL);
+ if (!map) {
+ DMERR("Could not allocate initial extent map");
+ return -ENOMEM;
+ }
+
+ DMDEBUG("Allocated initial extent map of %u bytes, %d entries.",
+ DMLOOP_MAP_SIZE(LOOP_MAX_EXTENTS), LOOP_MAX_EXTENTS);
+
+ inode = lc->filp->f_mapping->host;
+ /* FIXME Check if this is possible */
+ if (!inode)
+ goto out_free;
+
+ if (!inode->i_sb || !inode->i_sb->s_bdev) {
+ strcpy(lc->name, "none");
+ DMERR("Non-block-device-based filesystems are not supported");
+ goto out_free;
+ }
+
+ lc->bdev = inode->i_sb->s_bdev;
+ bdevname(lc->bdev, &lc->name[0]);
+ DMDEBUG("setting real device to %s", lc->name);
+
+ blkbits = inode->i_blkbits;
+ probe_block = lc->offset >> blkbits;
+ shiftbits = blkbits - SECTOR_SHIFT;
+ last_block = lc->size >> blkbits;
+
+ DMDEBUG("scanning file blocks %llu-%llu", probe_block, last_block - 1);
+ DMDEBUG("using: blkbits=%u, probe_block=%llu, "
+ "sectors_per_block=%u, last_block=%llu",
+ blkbits, probe_block, 1 << shiftbits, last_block);
+
+// FIXME Can this be a separate function?
+ while (probe_block < last_block && nr_extents < LOOP_MAX_EXTENTS) {
+ sector_t first_block;
+ sector_t cur_block;
+ sector_t nr_blocks = 0;
+
+ first_block = bmap(inode, probe_block);
+ DMDEBUG("new extent starting r/b/o: %llu/%llu/%llu",
+ first_block, probe_block, probe_block << blkbits);
+
+ if (!first_block)
+ goto bad_bmap;
+
+ DMDEBUG(" (%d) bmapped first file block %llu to %llu",
+ nr_extents + 1, probe_block, first_block);
+
+ probe_block++;
+
+ for (cur_block = first_block; probe_block < last_block; probe_block++) {
+ nr_blocks++;
+ cur_block = bmap(inode, probe_block);
+ if (!cur_block)
+ goto bad_bmap;
+ if (cur_block != first_block + nr_blocks) {
+ /* Discontiguity */
+ sector_t len = nr_blocks << shiftbits;
+ DMDEBUG("adding device extent %d (%llu/%llu/%llu)",
+ nr_extents, start, len, first_block);
+ ADD_DEV_EXTENT(start, len, (first_block << shiftbits));
+ start = (probe_block - (lc->offset >> blkbits)) << shiftbits;
+ goto reprobe;
+ }
+ }
+ DMDEBUG("adding final device extent %d (%llu/%llu/%llu)",
+ nr_extents, start, (nr_blocks + 1) << shiftbits,
+ first_block << shiftbits);
+ ADD_DEV_EXTENT(start, (nr_blocks + 1) << shiftbits, first_block << shiftbits);
+reprobe:
+ continue;
+ }
+
+ map->nr_extents = nr_extents;
+ map->cur_extent = 0;
+
+ DMDEBUG("created initial extent map, finalizing.");
+ map = finalize_map(map);
+ DMINFO("Finalized extent map of %u bytes, %d entries.",
+ (map->nr_extents * sizeof(struct extent)),
+ map->nr_extents);
+
+ dump_extent_map(map);
+ lc->blkbits = blkbits;
+ lc->map = map;
+
+ return 0;
+
+bad_bmap:
+ DMERR("Loopfile has holes");
+ dump_extent_map(map);
+out_free:
+ kfree(map);
+ return -EINVAL;
+}
+
+static int contains_sector(struct extent *e, sector_t s)
+{
+ return ((s < (e->start + (e->len))) && e->start <= s);
+}
+
+/*
+ * For now this just tries to work. There is lots of scope for improving
+ * performance later, once the behaviour is better understood.
+*/
+static struct extent *find_extent(struct extent_map *map, sector_t s)
+{
+ unsigned i;
+
+ if (contains_sector(DMLOOP_EXTENT(map->cur_extent), s))
+ return DMLOOP_EXTENT(map->cur_extent);
+
+ /* FIXME */
+ for(i = 0; i < map->nr_extents; i++)
+ if (contains_sector(DMLOOP_EXTENT(i), s)) {
+ map->cur_extent = i;
+ return DMLOOP_EXTENT(i);
+ }
+
+ return NULL;
+}
+
+/* bmap debugging support */
+#ifdef CONFIG_DM_DEBUG
+#define CACHE_OLD_SECTOR sector_t old_bi_sector = bio->bi_sector
+unsigned bmap_debug;
+#define BMAP_DEBUG \
+do { \
+ /* temporary - x check for split_io */ \
+ if (bio_sectors(bio) > (e->start + e->len)) { \
+ DMDEBUG("WARNING: bio doesn't fit in extent"); \
+ return -EIO; \
+ } \
+ if (bmap_debug) \
+ DMDEBUG("mapping %u logical sectors starting %llu " \
+ "to dev extent at real sector %llu", \
+ bio_sectors(bio), old_bi_sector, bio->bi_sector); \
+} while(0);
+#else
+#define CACHE_OLD_SECTOR
+#define BMAP_DEBUG
+#endif /* CONFIG_DM_DEBUG */
+
+/*
+ * Perform a simple remapping of logical -> physical sector using the extent table.
+ *
+*/
+static int do_remap_dev_bio(struct dm_target *ti, struct bio *bio, struct extent *e)
+{
+ struct loop_c *lc = (struct loop_c*) ti->private;
+
+ CACHE_OLD_SECTOR;
+ bio->bi_bdev = lc->bdev;
+ bio->bi_sector = ((sector_t)e->data +
+ (bio->bi_sector - (e->start + ti->begin)));
+ BMAP_DEBUG;
+
+ return 1;
+}
+
+static int loop_map(struct dm_target *ti, struct bio *bio,
+ union map_info *context)
+{
+ struct loop_c *lc = ti->private;
+ struct extent *e;
+
+ if (bio_barrier(bio))
+ return -EOPNOTSUPP;
+
+ e = find_extent(lc->map, bio->bi_sector - ti->begin);
+ if (!e) {
+ DMERR("Error: sector %llu in device, but no matching "
+ "extent found.", bio->bi_sector);
+ goto error;
+ }
+
+ switch (DMLOOP_EXTENT_TYPE(e)) {
+ case DMLOOP_DEV:
+ return do_remap_dev_bio(ti, bio, e);
+ default:
+ DMERR("Illegal extent type %d at offset 0x%x\n",
+ DMLOOP_EXTENT_TYPE(e), (e - lc->map->extents));
+ BUG();
+ }
+
+error:
+ return -EIO;
+}
+
+/*
+ * This needs some thought on handling unlinked backing files. some parts of
+ * the kernel return a cached name (now invalid), while others return a dcache
+ * "/path/to/foo (deleted)" name (never was/is valid). Which is better is
+ * debatable.
+ *
+ * On the one hand, using a cached name gives table output which is directly
+ * usable assuming the user re-creates the unlinked image file, on the other
+ * it is more consistent with e.g. swap to use the dcache name.
+*/
+static int loop_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned maxlen)
+{
+ struct loop_c *lc = (struct loop_c *) ti->private;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ break;
+
+ case STATUSTYPE_TABLE:
+ snprintf(result, maxlen, "%s %llu", lc->loop_path,
+ lc->offset);
+ break;
+ }
+ return 0;
+}
+
+static int loop_invalidate_file(struct file *filp)
+{
+ return invalidate_inode_pages(filp->f_mapping);
+}
+
+/*
+ * This should map start/end to pgoff_t and use
+ * invalidate_inode_pages_range.
+ * For now we toss out the whole lot.
+static int loop_invalidate_file_range(struct file *filp,
+ loff_t start, loff_t end)
+{
+ start = start; end = end;
+ return loop_invalidate_file(filp);
+}
+*/
+
+static void loop_put_file(struct file *filp)
+{
+ struct inode *inode;
+
+ if (!filp)
+ return;
+
+ inode = filp->f_mapping->host;
+
+ mutex_lock(&inode->i_mutex);
+ inode->i_flags &= ~S_SWAPFILE;
+ mutex_unlock(&inode->i_mutex);
+
+ filp_close(filp, NULL);
+}
+
+static struct file *loop_get_file(char *loop_path, unsigned *flags)
+{
+ struct file *filp;
+ struct inode *inode;
+ int r;
+
+ filp = filp_open(loop_path,
+ ((*flags & DMLOOP_READONLY) ? O_RDONLY : O_RDWR) |
+ O_DIRECT | O_LARGEFILE, 0);
+ if (IS_ERR(filp))
+ return filp;
+
+ inode = filp->f_mapping->host;
+ if (!S_ISREG(inode->i_mode)) {
+ DMERR("file is not a regular file: %s", loop_path);
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (mapping_writably_mapped(filp->f_mapping)) {
+ DMERR("file is mapped into userspace for writing: %s", loop_path);
+ r = -EBUSY;
+ goto out;
+ }
+
+ if (mapping_mapped(filp->f_mapping))
+ DMWARN("file is mapped into userspace: %s", loop_path);
+
+ if (IS_SWAPFILE(inode)) {
+ DMERR("file is already in use: %s", loop_path);
+ goto out;
+ }
+
+ /*
+ * We overload the S_SWAPFILE flag for loop targets because
+ * it provides the same no-truncate semantics we require, and holding
+ * onto i_sem is no longer an option.
+ */
+ mutex_lock(&inode->i_mutex);
+ inode->i_flags |= S_SWAPFILE;
+ mutex_unlock(&inode->i_mutex);
+
+ return filp;
+
+out:
+ fput(filp);
+
+ return ERR_PTR(r);
+}
+
+static int loop_setup_size(struct loop_c *lc, struct dm_target *ti, char **estr)
+{
+ struct inode *inode = lc->filp->f_mapping->host;
+
+ lc->size = i_size_read(inode);
+ lc->blkbits = inode->i_blkbits;
+
+ if (lc->offset & (1 << lc->blkbits - 1)) {
+ DMERR("Backing file offset of %lld bytes not a multiple of "
+ "filesystem blocksize (%d)", lc->offset,
+ 1 << lc->blkbits);
+ *estr = "Loop file offset must be a multiple of fs blocksize";
+ goto error;
+ }
+
+ if (!lc->size) {
+ *estr = "Backing file is empty";
+ goto error;
+ }
+
+ if (lc->size < to_bytes(1)) {
+ *estr = "Backing file cannot be less than one sector in size";
+ goto error;
+ }
+
+ lc->sectors = to_sector(inode->i_size);
+ if (to_bytes(lc->sectors) < lc->size)
+ DMWARN("Not using %llu bytes in incomplete block at EOF",
+ lc->size - to_bytes(lc->sectors));
+
+ if (lc->size - lc->offset < to_bytes(ti->len)) {
+ *estr = "Mapped region cannot be smaller than target size";
+ goto error;
+ }
+
+ return 0;
+
+error:
+ return -EINVAL;
+}
+
+void loop_flush(struct dm_target *ti)
+{
+ struct loop_c *lc = ti->private;
+
+ loop_invalidate_file(lc->filp);
+}
+
+static void loop_dtr(struct dm_target *ti)
+{
+ struct loop_c *lc = ti->private;
+
+ if (!(lc->flags & DMLOOP_READONLY))
+ loop_invalidate_file(lc->filp);
+
+ loop_put_file(lc->filp);
+
+ DMINFO("Released file %s", lc->loop_path);
+
+ if (lc->map)
+ kfree(lc->map);
+
+ kfree(lc);
+}
+
+/*
+ * Construct a loopback mapping: <loop_path> <offset>
+ */
+static int loop_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+ struct loop_c *lc;
+ int r;
+
+ if (argc != 2) {
+ ti->error = "Invalid argument count";
+ DMDEBUG("Invalid argument count");
+ return -EINVAL;
+ }
+
+ lc = kzalloc(sizeof(*lc), GFP_KERNEL);
+ if (!lc) {
+ ti->error = "Cannot allocate loop context";
+ return -ENOMEM;
+ }
+
+ r = -ENOMEM;
+ lc->loop_path = kstrdup(argv[0], GFP_KERNEL);
+ if (!lc->loop_path)
+ goto out;
+
+ r = -EINVAL;
+ if (sscanf(argv[1], "%lld", &lc->offset) != 1) {
+ ti->error = "Invalid file offset";
+ goto out;
+ }
+
+ if (!(dm_table_get_mode(ti->table) & FMODE_WRITE))
+ lc->flags |= DMLOOP_READONLY;
+
+ lc->filp = loop_get_file(lc->loop_path, &lc->flags);
+ if (IS_ERR(lc->filp)) {
+ ti->error = "Bad loop backing file";
+ r = PTR_ERR(lc->filp);
+ goto out;
+ }
+
+ r = loop_setup_size(lc, ti, &ti->error);
+ if (r)
+ goto out_putf;
+
+ r = setup_loop_extents(lc);
+ if (r) {
+ ti->error = "Could not create extent map";
+ goto out_putf;
+ }
+
+ /* Split I/O at block boundaries */
+ ti->split_io = 1 << (lc->blkbits - SECTOR_SHIFT);
+ DMDEBUG("Splitting io at %llu sector boundaries", ti->split_io);
+
+ if (lc->bdev)
+ dm_set_device_limits(ti, lc->bdev);
+
+ DMDEBUG("Constructed loop target to %s on real device %s "
+ "(%lldk, %llu sectors)", lc->loop_path,
+ lc->name, (lc->size >> 10), lc->sectors);
+
+ ti->private = lc;
+
+ return 0;
+
+out_putf:
+ loop_put_file(lc->filp);
+
+out:
+ kfree(lc);
+ return r;
+}
+
+static struct target_type loop_target = {
+ .name = "loop",
+ .version = {0, 0, 1},
+ .module = THIS_MODULE,
+ .ctr = loop_ctr,
+ .dtr = loop_dtr,
+ .map = loop_map,
+ .presuspend = loop_flush,
+ .flush = loop_flush,
+ .status = loop_status,
+};
+
+int __init dm_loop_init(void)
+{
+ int r;
+
+ r = dm_register_target(&loop_target);
+
+ if (r < 0) {
+ DMERR("Register failed %d", r);
+ goto out;
+ }
+
+ r = -ENOMEM;
+
+ DMINFO("Loop target registered");
+ return 0;
+
+out:
+ return r;
+}
+
+void dm_loop_exit(void)
+{
+ int r;
+
+ r = dm_unregister_target(&loop_target);
+
+ if (r < 0)
+ DMERR("Target unregister failed %d", r);
+}
+
+module_init(dm_loop_init);
+module_exit(dm_loop_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bryn Reeves <breeves@redhat.com>");
+MODULE_DESCRIPTION("device-mapper loop target");
+
+#ifdef CONFIG_DM_DEBUG
+module_param(bmap_debug, int, 0);
+MODULE_PARM_DESC(bmap_debug, "enable bmap debugging output (VERY noisy).");
+#endif /* CONFIG_DM_DEBUG */