diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 78db38d02bc9..875b2b56b87f 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -243,5 +243,29 @@ to backing storage rather than keeping it in memory. User should set up backing device via /sys/block/zramX/backing_dev before disksize setting. += memory tracking + +With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the +zram block. It could be useful to catch cold or incompressible +pages of the process with*pagemap. +If you enable the feature, you could see block state via +/sys/kernel/debug/zram/zram0/block_state". The output is as follows, + + 300 75.033841 .wh + 301 63.806904 s.. + 302 63.806919 ..h + +First column is zram's block index. +Second column is access time since the system was booted +Third column is state of the block. +(s: same page +w: written page to backing store +h: huge page) + +First line of above example says 300th block is accessed at 75.033841sec +and the block's state is huge so it is written back to the backing +storage. It's a debugging feature so anyone shouldn't rely on it to work +properly. + Nitin Gupta ngupta@vflare.org diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index ac3a31d433b2..635235759a0a 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -13,7 +13,7 @@ config ZRAM It has several use cases, for example: /tmp storage, use as swap disks and maybe many more. - See zram.txt for more information. + See Documentation/blockdev/zram.txt for more information. config ZRAM_WRITEBACK bool "Write back incompressible page to backing device" @@ -25,4 +25,14 @@ config ZRAM_WRITEBACK For this feature, admin should set up backing device via /sys/block/zramX/backing_dev. - See zram.txt for more infomration. + See Documentation/blockdev/zram.txt for more information. + +config ZRAM_MEMORY_TRACKING + bool "Track zRam block status" + depends on ZRAM && DEBUG_FS + help + With this feature, admin can track the state of allocated blocks + of zRAM. Admin could see the information via + /sys/kernel/debug/zram/zramX/block_state. + + See Documentation/blockdev/zram.txt for more information. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7fc10e2ad734..da51293e7c03 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "zram_drv.h" @@ -67,6 +68,13 @@ static inline bool init_done(struct zram *zram) return zram->disksize; } +static inline bool zram_allocated(struct zram *zram, u32 index) +{ + + return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) || + zram->table[index].handle; +} + static inline struct zram *dev_to_zram(struct device *dev) { return (struct zram *)dev_to_disk(dev)->private_data; @@ -83,7 +91,7 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) } /* flag operations require table entry bit_spin_lock() being held */ -static int zram_test_flag(struct zram *zram, u32 index, +static bool zram_test_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { return zram->table[index].value & BIT(flag); @@ -107,16 +115,6 @@ static inline void zram_set_element(struct zram *zram, u32 index, zram->table[index].element = element; } -static void zram_accessed(struct zram *zram, u32 index) -{ - zram->table[index].ac_time = sched_clock(); -} - -static void zram_reset_access(struct zram *zram, u32 index) -{ - zram->table[index].ac_time = 0; -} - static unsigned long zram_get_element(struct zram *zram, u32 index) { return zram->table[index].element; @@ -620,6 +618,114 @@ static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, static void zram_wb_clear(struct zram *zram, u32 index) {} #endif +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + +static struct dentry *zram_debugfs_root; + +static void zram_debugfs_create(void) +{ + zram_debugfs_root = debugfs_create_dir("zram", NULL); +} + +static void zram_debugfs_destroy(void) +{ + debugfs_remove_recursive(zram_debugfs_root); +} + +static void zram_accessed(struct zram *zram, u32 index) +{ + zram->table[index].ac_time = ktime_get_boottime(); +} + +static void zram_reset_access(struct zram *zram, u32 index) +{ + zram->table[index].ac_time = 0; +} + +static ssize_t read_block_state(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t index, written = 0; + struct zram *zram = file->private_data; + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + struct timespec64 ts; + + kbuf = kvmalloc(count, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + up_read(&zram->init_lock); + kvfree(kbuf); + return -EINVAL; + } + + for (index = *ppos; index < nr_pages; index++) { + int copied; + + zram_slot_lock(zram, index); + if (!zram_allocated(zram, index)) + goto next; + + ts = ktime_to_timespec64(zram->table[index].ac_time); + copied = snprintf(kbuf + written, count, + "%12zd %12lld.%06lu %c%c%c\n", + index, (s64)ts.tv_sec, + ts.tv_nsec / NSEC_PER_USEC, + zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', + zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', + zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.'); + + if (count < copied) { + zram_slot_unlock(zram, index); + break; + } + written += copied; + count -= copied; +next: + zram_slot_unlock(zram, index); + *ppos += 1; + } + + up_read(&zram->init_lock); + if (copy_to_user(buf, kbuf, written)) + written = -EFAULT; + kvfree(kbuf); + + return written; +} + +static const struct file_operations proc_zram_block_state_op = { + .open = simple_open, + .read = read_block_state, + .llseek = default_llseek, +}; + +static void zram_debugfs_register(struct zram *zram) +{ + if (!zram_debugfs_root) + return; + + zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, + zram_debugfs_root); + debugfs_create_file("block_state", 0400, zram->debugfs_dir, + zram, &proc_zram_block_state_op); +} + +static void zram_debugfs_unregister(struct zram *zram) +{ + debugfs_remove_recursive(zram->debugfs_dir); +} +#else +static void zram_debugfs_create(void) {}; +static void zram_debugfs_destroy(void) {}; +static void zram_accessed(struct zram *zram, u32 index) {}; +static void zram_reset_access(struct zram *zram, u32 index) {}; +static void zram_debugfs_register(struct zram *zram) {}; +static void zram_debugfs_unregister(struct zram *zram) {}; +#endif /* * We switched to per-cpu streams and this attr is not needed anymore. @@ -1604,6 +1710,7 @@ static int zram_add(void) } strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); + zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; @@ -1637,6 +1744,7 @@ static int zram_remove(struct zram *zram) zram->claim = true; mutex_unlock(&bdev->bd_mutex); + zram_debugfs_unregister(zram); /* * Remove sysfs first, so no one will perform a disksize * store while we destroy the devices. This also helps during @@ -1739,6 +1847,7 @@ static void destroy_devices(void) { class_unregister(&zram_control_class); idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); + zram_debugfs_destroy(); idr_destroy(&zram_index_idr); unregister_blkdev(zram_major, "zram"); cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); @@ -1760,6 +1869,7 @@ static int __init zram_init(void) return ret; } + zram_debugfs_create(); zram_major = register_blkdev(0, "zram"); if (zram_major <= 0) { pr_err("Unable to get major number\n"); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 1075218e88b2..72c8584b6dff 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -61,7 +61,9 @@ struct zram_table_entry { unsigned long element; }; unsigned long value; - u64 ac_time; +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + ktime_t ac_time; +#endif }; struct zram_stats { @@ -110,5 +112,8 @@ struct zram { unsigned long nr_pages; spinlock_t bitmap_lock; #endif +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + struct dentry *debugfs_dir; +#endif }; #endif