From caa65070d5156e5dfa3cc0a6ffa2658d7454edb2 Mon Sep 17 00:00:00 2001 From: Carlos Ayrton Lopez Arroyo <15030201@itcelaya.edu.mx> Date: Tue, 31 May 2022 22:18:10 -0500 Subject: [PATCH 01/44] Revert "zram_drv: allow overriding zram size from kernel" This reverts commit 3844a5d9f7038fc5ba75c7a3cd1e070d46899968. Users want userspace to be able to modify this value, for unjustified reasons but who cares anyway, default value will be set by post_boot service if exists --- drivers/block/zram/Kconfig | 5 ----- drivers/block/zram/zram_drv.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index f06cd83cb067..bf64a66816a2 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -53,8 +53,3 @@ config ZRAM_MEMORY_TRACKING /sys/kernel/debug/zram/zramX/block_state. See Documentation/blockdev/zram.txt for more information. - -config ZRAM_SIZE_OVERRIDE - int "zram size to set from kernel" - range 1 8 - default 2 diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a4f347f5c08b..097176fbd1b6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1839,14 +1839,9 @@ static ssize_t disksize_store(struct device *dev, struct zram *zram = dev_to_zram(dev); int err; -#ifndef CONFIG_ZRAM_SIZE_OVERRIDE disksize = memparse(buf, NULL); if (!disksize) return -EINVAL; -#else - disksize = (u64)SZ_1G * CONFIG_ZRAM_SIZE_OVERRIDE; - pr_info("Overriding zram size to %li", disksize); -#endif down_write(&zram->init_lock); if (init_done(zram)) { From 609fc8fa9a04f5e99fb305b6ac9b4d6f38858304 Mon Sep 17 00:00:00 2001 From: UtsavBalar1231 Date: Fri, 16 Jul 2021 17:51:45 +0200 Subject: [PATCH 02/44] input: aw8697_haptic: Disable Debugging Change-Id: I463a78cecfb19c50f76ffa747a072aac3bb4ff77 Signed-off-by: UtsavBalar1231 --- drivers/input/misc/aw8697_haptic/aw8697.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/misc/aw8697_haptic/aw8697.c b/drivers/input/misc/aw8697_haptic/aw8697.c index 96acac86889a..9fc92bb13ecb 100755 --- a/drivers/input/misc/aw8697_haptic/aw8697.c +++ b/drivers/input/misc/aw8697_haptic/aw8697.c @@ -13,7 +13,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ -#define DEBUG #include #include #include From 8ca046b8397a3e4defbf6e01d7df191c0b3b712e Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 28 May 2021 11:40:50 -0600 Subject: [PATCH 03/44] BACKPORT: dfc_qmi: Honor tcp ancillary bit even when there is no change in grant The tcp ancillary bit has been updated to be sent in almost all cases unlike earlier where this was sent in case of TCP DL traffic. This bit will now not be set only in case of adverse scenarios, so this change ensures that the ACK queue state is updated in case the ancillary bit is unset. CRs-Fixed: 2957344 Change-Id: I4e1f26c9d3fabc64401284e36b48d82a4f3a5161 Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: UtsavBalar1231 --- drivers/soc/qcom/dfc_qmi.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/soc/qcom/dfc_qmi.c b/drivers/soc/qcom/dfc_qmi.c index 3cd7a6effc63..178877832000 100644 --- a/drivers/soc/qcom/dfc_qmi.c +++ b/drivers/soc/qcom/dfc_qmi.c @@ -915,12 +915,18 @@ int dfc_bearer_flow_ctl(struct net_device *dev, enable = bearer->grant_size ? true : false; - qmi_rmnet_flow_control(dev, bearer->mq_idx, enable); + /* Do not flow disable tcp ack q in tcp bidir + * ACK queue opened first to drain ACKs faster + * Although since tcp ancillary is true most of the time, + * this shouldn't really make a difference + * If there is non zero grant but tcp ancillary is false, + * send out ACKs anyway + */ + if (bearer->ack_mq_idx != INVALID_MQ) + qmi_rmnet_flow_control(dev, bearer->ack_mq_idx, + enable || bearer->tcp_bidir); - /* Do not flow disable tcp ack q in tcp bidir */ - if (bearer->ack_mq_idx != INVALID_MQ && - (enable || !bearer->tcp_bidir)) - qmi_rmnet_flow_control(dev, bearer->ack_mq_idx, enable); + qmi_rmnet_flow_control(dev, bearer->mq_idx, enable); if (!enable && bearer->ack_req) dfc_send_ack(dev, bearer->bearer_id, @@ -1015,7 +1021,8 @@ static int dfc_update_fc_map(struct net_device *dev, struct qos_info *qos, } if ((itm->grant_size == 0 && adjusted_grant > 0) || - (itm->grant_size > 0 && adjusted_grant == 0)) + (itm->grant_size > 0 && adjusted_grant == 0) || + (itm->tcp_bidir ^ DFC_IS_TCP_BIDIR(ancillary))) action = true; /* This is needed by qmap */ From 1c2a6f841cbb30eb1086f2ff83b6a17c280869cb Mon Sep 17 00:00:00 2001 From: Weiyi Chen Date: Mon, 4 Oct 2021 10:33:27 -0700 Subject: [PATCH 04/44] BACKPORT: dfc: not caching grant for removed bearers If a grant indication for a bearer has "removed" flag set, caching the bearer could lead to data stall for the next data call because modem will not send grant for this bearer during powersave. Do not cache grant for the removed or disabled bearers. Also make sure a bearer is flow enabled when it is associated with a tx queue. Change-Id: I7eca597e3cc7d5a0bfe523201c454fb45e66a3a0 Signed-off-by: Weiyi Chen Signed-off-by: UtsavBalar1231 --- drivers/soc/qcom/dfc_qmi.c | 5 ++++- drivers/soc/qcom/qmi_rmnet.c | 13 ++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/soc/qcom/dfc_qmi.c b/drivers/soc/qcom/dfc_qmi.c index 178877832000..422c225f67c1 100644 --- a/drivers/soc/qcom/dfc_qmi.c +++ b/drivers/soc/qcom/dfc_qmi.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -995,7 +996,9 @@ static int dfc_update_fc_map(struct net_device *dev, struct qos_info *qos, u32 adjusted_grant; itm = qmi_rmnet_get_bearer_map(qos, fc_info->bearer_id); - if (!itm) + + /* cache the bearer assuming it is a new bearer */ + if (unlikely(!itm && !is_query && fc_info->num_bytes)) itm = qmi_rmnet_get_bearer_noref(qos, fc_info->bearer_id); if (itm) { diff --git a/drivers/soc/qcom/qmi_rmnet.c b/drivers/soc/qcom/qmi_rmnet.c index 61e15e96023a..060ce5d45ec3 100644 --- a/drivers/soc/qcom/qmi_rmnet.c +++ b/drivers/soc/qcom/qmi_rmnet.c @@ -393,12 +393,15 @@ static void __qmi_rmnet_update_mq(struct net_device *dev, bearer->mq_idx = itm->mq_idx; } - qmi_rmnet_flow_control(dev, itm->mq_idx, - bearer->grant_size > 0 ? 1 : 0); - + /* Always enable flow for the newly associated bearer */ + if (!bearer->grant_size) { + bearer->grant_size = DEFAULT_GRANT; + bearer->grant_thresh = + qmi_rmnet_grant_per(DEFAULT_GRANT); + } + qmi_rmnet_flow_control(dev, itm->mq_idx, 1); if (dfc_mode == DFC_MODE_SA) - qmi_rmnet_flow_control(dev, bearer->ack_mq_idx, - bearer->grant_size > 0 ? 1 : 0); + qmi_rmnet_flow_control(dev, bearer->ack_mq_idx, 1); } } From c94487d7cad8330ca0b2f8d6254d9365b392f8ab Mon Sep 17 00:00:00 2001 From: Weiyi Chen Date: Fri, 12 Nov 2021 10:40:11 -0800 Subject: [PATCH 05/44] BACKPORT: dfc: not using alarm timer Not using alarm timer for dfc powersave check and eliminate the need for a wakelock. This allows AP to go to suspend quicker. Change-Id: I7153055d0231a65125ad88808db9e1d0032f24d9 Signed-off-by: Weiyi Chen Signed-off-by: UtsavBalar1231 --- drivers/soc/qcom/qmi_rmnet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/qmi_rmnet.c b/drivers/soc/qcom/qmi_rmnet.c index 060ce5d45ec3..04b9ec332720 100644 --- a/drivers/soc/qcom/qmi_rmnet.c +++ b/drivers/soc/qcom/qmi_rmnet.c @@ -43,6 +43,7 @@ unsigned int rmnet_wq_frequency __read_mostly = 1000; #define PS_WORK_ACTIVE_BIT 0 #define PS_INTERVAL (((!rmnet_wq_frequency) ? \ 1 : rmnet_wq_frequency/10) * (HZ/100)) +#define PS_INTERVAL_JF (msecs_to_jiffies(PS_INTERVAL)) #define NO_DELAY (0x0000 * HZ) #define WATCHDOG_EXPIRE_JF (msecs_to_jiffies(50)) @@ -1248,7 +1249,7 @@ void qmi_rmnet_work_init(void *port) rmnet_ps_wq = NULL; return; } - INIT_DEFERRABLE_WORK(&rmnet_work->work, qmi_rmnet_check_stats); + INIT_DELAYED_WORK(&rmnet_work->work, qmi_rmnet_check_stats); rmnet_work->port = port; rmnet_get_packets(rmnet_work->port, &rmnet_work->old_rx_pkts, &rmnet_work->old_tx_pkts); From 68e192e5db6673b6607b9c25a3d6696ae5d30e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E8=8F=8C=E2=80=A2=E6=9C=AA=E9=9C=9C?= <799620521@qq.com> Date: Sat, 12 Feb 2022 20:22:12 +0800 Subject: [PATCH 06/44] power: supply: qcom: Show true percentage capacity to userplace LMI, APOLLO devices do not use bq27z561 fuel gauge, so commit 979afd2cea8cb78b731d5ed6f8387e8a7d1532f2 does not work on these devices. Signed-off-by: UtsavBalar1231 Change-Id: Ib3b7d5a50eac0c3375866bfca54e239fc798b1ae --- drivers/power/supply/qcom/qpnp-fg-gen4.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/power/supply/qcom/qpnp-fg-gen4.c b/drivers/power/supply/qcom/qpnp-fg-gen4.c index ca1579f652c8..a2b392f886b0 100644 --- a/drivers/power/supply/qcom/qpnp-fg-gen4.c +++ b/drivers/power/supply/qcom/qpnp-fg-gen4.c @@ -5064,6 +5064,7 @@ static int fg_psy_get_property(struct power_supply *psy, int64_t temp; int vbatt_uv; int shutdown_voltage; + int capacity_major, capacity_minor; static bool shutdown_delay_cancel; static bool last_shutdown_delay; @@ -5135,15 +5136,13 @@ static int fg_psy_get_property(struct power_supply *psy, break; #endif case POWER_SUPPLY_PROP_CAPACITY: - rc = fg_gen4_get_prop_capacity(fg, &pval->intval); - //Using smooth battery capacity. - if (fg->param.batt_soc >= 0 && !chip->rapid_soc_dec_en && !chip->soc_scale_mode) - pval->intval = fg->param.batt_soc; + rc = fg_gen4_get_prop_capacity_raw(chip, &pval->intval); + capacity_major = pval->intval / 100; + capacity_minor = pval->intval % 100; + if (capacity_minor >= 50) + capacity_major++; - if (chip->dt.fg_increase_100soc_time) { - if (fg->param.smooth_batt_soc >= 0 && !chip->rapid_soc_dec_en && !chip->soc_scale_mode) - pval->intval = fg->param.smooth_batt_soc; - } + pval->intval = capacity_major; //shutdown delay feature if (chip->dt.shutdown_delay_enable) { From 61d8ec3e3e95903c2ea8b52e025c6d02f31eb577 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Sat, 24 Aug 2019 16:08:27 -0700 Subject: [PATCH 07/44] power: qpnp-fg-gen4: Always read MSOC for hi-res raw capacity The PMIC provides accurate MSOC data regardless of whether a charger is connected, so there's no reason to return a low-precision BSOC value when running on battery. Test: hi-res values continue to be reported when the device is unplugged and hi-res reporting is enabled in DT Signed-off-by: Danny Lin Signed-off-by: UtsavBalar1231 Change-Id: I884cda3aad6a648b0323220e21389180b28822e6 --- drivers/power/supply/qcom/qpnp-fg-gen4.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/power/supply/qcom/qpnp-fg-gen4.c b/drivers/power/supply/qcom/qpnp-fg-gen4.c index a2b392f886b0..65f30188b654 100644 --- a/drivers/power/supply/qcom/qpnp-fg-gen4.c +++ b/drivers/power/supply/qcom/qpnp-fg-gen4.c @@ -1070,13 +1070,6 @@ static int fg_gen4_get_prop_capacity_raw(struct fg_gen4_chip *chip, int *val) return rc; } - if (!is_input_present(fg)) { - rc = fg_gen4_get_prop_capacity(fg, val); - if (!rc) - *val = *val * 100; - return rc; - } - rc = fg_get_sram_prop(&chip->fg, FG_SRAM_MONOTONIC_SOC, val); if (rc < 0) { pr_err("Error in getting MONOTONIC_SOC, rc=%d\n", rc); From d46f9ab223a2a1f8a9fe087dbbe8691bee72ddb9 Mon Sep 17 00:00:00 2001 From: Carlos Ayrton Lopez Arroyo <15030201@itcelaya.edu.mx> Date: Tue, 31 May 2022 22:35:41 -0500 Subject: [PATCH 08/44] drivers: thermal: cleanup thermal implementation Also add newest nodes from latest MiCode release --- drivers/cpufreq/cpufreq.c | 4 - drivers/cpufreq/cpufreq_times.c | 329 ------------------------------- drivers/thermal/cpu_cooling.c | 56 ++---- drivers/thermal/qcom/bcl_pmic5.c | 16 -- drivers/thermal/thermal_core.c | 148 ++++++++++---- 5 files changed, 129 insertions(+), 424 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e26faa714aa5..245b4366f294 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2283,10 +2283,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_ADJUST, new_policy); - /* adjust if necessary - hardware incompatibility */ - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_THERMAL, new_policy); - /* * verify the cpu speed can be set within this limit, which might be * different to the first one diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c index 1e0b21e46860..210742e963e0 100644 --- a/drivers/cpufreq/cpufreq_times.c +++ b/drivers/cpufreq/cpufreq_times.c @@ -25,19 +25,12 @@ #include #include -#ifndef SYSTEM_UID -#define SYSTEM_UID 1000 -#endif -#define MAX_TASK_COMM_LEN 16 - #define UID_HASH_BITS 10 static DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS); -static DECLARE_HASHTABLE(sys_app_hash_table, UID_HASH_BITS); static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */ static DEFINE_SPINLOCK(uid_lock); /* uid_hash_table */ -static DEFINE_SPINLOCK(pid_lock); /* sys_app_hash_table */ struct concurrent_times { atomic64_t active[NR_CPUS]; @@ -53,17 +46,6 @@ struct uid_entry { u64 time_in_state[0]; }; -struct pid_entry { - u64 hash_code; - char *package; - pid_t pid; - struct concurrent_times *concurrent_times; - struct hlist_node hash; - unsigned int max_state; - u64 time_in_state[0]; - struct rcu_head rcu; -}; - /** * struct cpu_freqs - per-cpu frequency information * @offset: start of these freqs' stats in task time_in_state array @@ -155,143 +137,6 @@ static struct uid_entry *find_or_register_uid_locked(uid_t uid) return uid_entry; } -/* - * simple hash function for a string, - * http://www.cse.yorku.ca/~oz/hash.html - */ -static u64 hash_string(const char *str) -{ - u64 hash = 5381; - int c; - - while ((c = *str++)) - hash = ((hash << 5) + hash) + c; - - return hash; -} - -/* Caller must hold rcu_read_lock() */ -static struct pid_entry *find_pid_entry_rcu(u64 hash_code) -{ - struct pid_entry *pid_entry; - - hash_for_each_possible_rcu(sys_app_hash_table, pid_entry, hash, hash_code) { - if (pid_entry->hash_code == hash_code) - return pid_entry; - } - return NULL; -} - -/* Caller must hold pid lock */ -static struct pid_entry *find_pid_entry_locked(u64 hash_code) -{ - struct pid_entry *pid_entry; - - hash_for_each_possible(sys_app_hash_table, pid_entry, hash, hash_code) { - if (pid_entry->hash_code == hash_code) - return pid_entry; - } - return NULL; -} - -/* Caller must hold pid lock */ -static struct pid_entry *find_or_register_pid_locked(u64 hash_code, - const char *package, pid_t pid) -{ - struct pid_entry *pid_entry, *temp; - struct concurrent_times *times; - unsigned int max_state = READ_ONCE(next_offset); - size_t alloc_size = sizeof(*pid_entry) + max_state * - sizeof(pid_entry->time_in_state[0]); - pid_entry = find_pid_entry_locked(hash_code); - if (pid_entry) { - if (pid_entry->max_state == max_state) - return pid_entry; - temp = __krealloc(pid_entry, alloc_size, GFP_ATOMIC); - if (!temp) - return pid_entry; - temp->max_state = max_state; - memset(temp->time_in_state + pid_entry->max_state, 0, - (max_state - pid_entry->max_state) * - sizeof(pid_entry->time_in_state[0])); - if (temp != pid_entry) { - hlist_replace_rcu(&pid_entry->hash, &temp->hash); - kfree_rcu(pid_entry, rcu); - } - return temp; - } - - pid_entry = kzalloc(alloc_size, GFP_ATOMIC); - if (!pid_entry) - return NULL; - times = kzalloc(sizeof(*times), GFP_ATOMIC); - if (!times) { - kfree(pid_entry); - return NULL; - } - pid_entry->package = kzalloc(MAX_TASK_COMM_LEN, GFP_ATOMIC); - if (!pid_entry->package) { - kfree(pid_entry); - return NULL; - } - - strncpy(pid_entry->package, package, MAX_TASK_COMM_LEN); - pid_entry->hash_code = hash_string(pid_entry->package); - pid_entry->pid = pid; - pid_entry->concurrent_times = times; - - hash_add_rcu(sys_app_hash_table, &pid_entry->hash, hash_code); - - return pid_entry; -} - -static void *pid_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos >= HASH_SIZE(sys_app_hash_table)) - return NULL; - - return &sys_app_hash_table[*pos]; -} - -static void *pid_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - do { - (*pos)++; - - if (*pos >= HASH_SIZE(sys_app_hash_table)) - return NULL; - } while (hlist_empty(&sys_app_hash_table[*pos])); - - return &sys_app_hash_table[*pos]; -} - -static void pid_seq_stop(struct seq_file *seq, void *v){ } - -static int sys_app_concurrent_time_seq_show(struct seq_file *m, void *v, - atomic64_t *(*get_times)(struct concurrent_times *)) -{ - struct pid_entry *pid_entry; - int i, num_possible_cpus = num_possible_cpus(); - rcu_read_lock(); - - hlist_for_each_entry_rcu(pid_entry, (struct hlist_head *)v, hash) { - atomic64_t *times = get_times(pid_entry->concurrent_times); - - seq_puts(m, pid_entry->package); - seq_putc(m, ':'); - - for (i = 0; i < num_possible_cpus; ++i) { - u64 time = nsec_to_clock_t(atomic64_read(×[i])); - - seq_put_decimal_ull(m, " ", time); - } - seq_putc(m, '\n'); - } - rcu_read_unlock(); - - return 0; -} - static int single_uid_time_in_state_show(struct seq_file *m, void *ptr) { struct uid_entry *uid_entry; @@ -428,89 +273,6 @@ static inline atomic64_t *get_policy_times(struct concurrent_times *times) return times->policy; } -static int sys_app_time_in_state_seq_show(struct seq_file *m, void *v) -{ - struct pid_entry *pid_entry; - struct cpu_freqs *freqs, *last_freqs = NULL; - int i, cpu; - - if (v == sys_app_hash_table) { - seq_puts(m, "sys_app:"); - for_each_possible_cpu(cpu) { - freqs = all_freqs[cpu]; - if (!freqs || freqs == last_freqs) - continue; - last_freqs = freqs; - for (i = 0; i < freqs->max_state; i++) { - seq_put_decimal_ull(m, " ", - freqs->freq_table[i]); - } - } - seq_putc(m, '\n'); - } - - rcu_read_lock(); - - hlist_for_each_entry_rcu(pid_entry, (struct hlist_head *)v, hash) { - if (pid_entry->max_state) { - seq_puts(m, pid_entry->package); - seq_putc(m, ':'); - } - for (i = 0; i < pid_entry->max_state; ++i) { - u64 time = nsec_to_clock_t(pid_entry->time_in_state[i]); - seq_put_decimal_ull(m, " ", time); - } - if (pid_entry->max_state) - seq_putc(m, '\n'); - } - - rcu_read_unlock(); - return 0; -} - -static int sys_app_concurrent_active_time_seq_show(struct seq_file *m, void *v) -{ - if (v == sys_app_hash_table) { - seq_put_decimal_ull(m, "cpus: ", num_possible_cpus()); - seq_putc(m, '\n'); - } - - return sys_app_concurrent_time_seq_show(m, v, get_active_times); -} - -static int sys_app_concurrent_policy_time_seq_show(struct seq_file *m, void *v) -{ - int i; - struct cpu_freqs *freqs, *last_freqs = NULL; - - if (v == sys_app_hash_table) { - int cnt = 0; - - for_each_possible_cpu(i) { - freqs = all_freqs[i]; - if (!freqs) - continue; - if (freqs != last_freqs) { - if (last_freqs) { - seq_put_decimal_ull(m, ": ", cnt); - seq_putc(m, ' '); - cnt = 0; - } - seq_put_decimal_ull(m, "policy", i); - - last_freqs = freqs; - } - cnt++; - } - if (last_freqs) { - seq_put_decimal_ull(m, ": ", cnt); - seq_putc(m, '\n'); - } - } - - return sys_app_concurrent_time_seq_show(m, v, get_policy_times); -} - static int concurrent_policy_time_seq_show(struct seq_file *m, void *v) { int i; @@ -644,10 +406,6 @@ void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) struct cpufreq_policy *policy; uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); int cpu = 0; - pid_t pid; - u64 tmp_hash; - struct pid_entry *pid_entry; - const char *package_name; if (!freqs || is_idle_task(p) || p->flags & PF_EXITING) return; @@ -666,17 +424,6 @@ void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) uid_entry->time_in_state[state] += cputime; spin_unlock_irqrestore(&uid_lock, flags); - if (uid == SYSTEM_UID) { - spin_lock_irqsave(&pid_lock, flags); - pid = p->tgid; - package_name = p->group_leader->comm; - tmp_hash = hash_string(package_name); - pid_entry = find_or_register_pid_locked(tmp_hash, package_name, pid); - if (pid_entry && state < pid_entry->max_state) - pid_entry->time_in_state[state] += cputime; - spin_unlock_irqrestore(&pid_lock, flags); - } - rcu_read_lock(); uid_entry = find_uid_entry_rcu(uid); if (!uid_entry) { @@ -711,16 +458,6 @@ void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) atomic64_add(cputime, &uid_entry->concurrent_times->policy[policy_first_cpu + policy_cpu_cnt - 1]); - if (uid == SYSTEM_UID) { - pid_entry = find_pid_entry_rcu(tmp_hash); - if (pid_entry) { - atomic64_add(cputime, - &pid_entry->concurrent_times->active[active_cpu_cnt - 1]); - atomic64_add(cputime, - &pid_entry->concurrent_times->policy[policy_first_cpu + - policy_cpu_cnt - 1]); - } - } rcu_read_unlock(); } @@ -879,63 +616,6 @@ static const struct file_operations concurrent_policy_time_fops = { .release = seq_release, }; -static const struct seq_operations sys_app_time_in_state_seq_ops = { - .start = pid_seq_start, - .next = pid_seq_next, - .stop = pid_seq_stop, - .show = sys_app_time_in_state_seq_show, -}; - -static int sys_app_time_in_state_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &sys_app_time_in_state_seq_ops); -} - -static const struct file_operations sys_app_time_in_state_fops = { - .open = sys_app_time_in_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct seq_operations sys_app_concurrent_active_time_seq_ops = { - .start = pid_seq_start, - .next = pid_seq_next, - .stop = pid_seq_stop, - .show = sys_app_concurrent_active_time_seq_show, -}; - -static int sys_app_concurrent_active_time_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &sys_app_concurrent_active_time_seq_ops); -} - -static const struct file_operations sys_app_concurrent_active_time_fops = { - .open = sys_app_concurrent_active_time_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct seq_operations sys_app_concurrent_policy_time_seq_ops = { - .start = pid_seq_start, - .next = pid_seq_next, - .stop = pid_seq_stop, - .show = sys_app_concurrent_policy_time_seq_show, -}; - -static int sys_app_concurrent_policy_time_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &sys_app_concurrent_policy_time_seq_ops); -} - -static const struct file_operations sys_app_concurrent_policy_time_fops = { - .open = sys_app_concurrent_policy_time_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init cpufreq_times_init(void) { proc_create_data("uid_time_in_state", 0444, NULL, @@ -947,15 +627,6 @@ static int __init cpufreq_times_init(void) proc_create_data("uid_concurrent_policy_time", 0444, NULL, &concurrent_policy_time_fops, NULL); - proc_create_data("sys_app_time_in_state", 0444, NULL, - &sys_app_time_in_state_fops, NULL); - - proc_create_data("sys_app_concurrent_active_time", 0444, NULL, - &sys_app_concurrent_active_time_fops, NULL); - - proc_create_data("sys_app_concurrent_policy_time", 0444, NULL, - &sys_app_concurrent_policy_time_fops, NULL); - return 0; } diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index fdb0eade69af..694c2b7e2ccb 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -35,8 +35,6 @@ #include -#define USE_LMH_DEV 0 - /* * Cooling state <-> CPUFreq frequency * @@ -153,39 +151,18 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, * Similarly, if policy minimum set by the user is less than * the floor_frequency, then adjust the policy->min. */ - if (clipped_freq > cpufreq_cdev->clipped_freq) - clipped_freq = cpufreq_cdev->clipped_freq; + clipped_freq = cpufreq_cdev->clipped_freq; + floor_freq = cpufreq_cdev->floor_freq; + if (policy->max > clipped_freq || policy->min < floor_freq) + cpufreq_verify_within_limits(policy, floor_freq, + clipped_freq); + break; } - cpufreq_verify_within_limits(policy, floor_freq, clipped_freq); mutex_unlock(&cooling_list_lock); return NOTIFY_OK; } -void cpu_limits_set_level(unsigned int cpu, unsigned int max_freq) -{ - struct cpufreq_cooling_device *cpufreq_cdev; - struct thermal_cooling_device *cdev; - unsigned int cdev_cpu; - unsigned int level; - - list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) { - sscanf(cpufreq_cdev->cdev->type, "thermal-cpufreq-%d", &cdev_cpu); - if (cdev_cpu == cpu) { - for (level = 0; level <= cpufreq_cdev->max_level; level++) { - int target_freq = cpufreq_cdev->em->table[level].frequency; - if (max_freq <= target_freq) { - cdev = cpufreq_cdev->cdev; - if (cdev) - cdev->ops->set_cur_state(cdev, cpufreq_cdev->max_level - level); - break; - } - } - break; - } - } -} - #ifdef CONFIG_ENERGY_MODEL /** * get_level: Find the level for a particular frequency @@ -441,18 +418,12 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, * can handle the CPU freq mitigation, if not, notify cpufreq * framework. */ - if (USE_LMH_DEV && cpufreq_cdev->plat_ops && - cpufreq_cdev->plat_ops->ceil_limit) { + if (cpufreq_cdev->plat_ops && + cpufreq_cdev->plat_ops->ceil_limit) cpufreq_cdev->plat_ops->ceil_limit(cpufreq_cdev->policy->cpu, clip_freq); - get_online_cpus(); - cpufreq_update_policy(cpufreq_cdev->policy->cpu); - put_online_cpus(); - } else { - get_online_cpus(); + else cpufreq_update_policy(cpufreq_cdev->policy->cpu); - put_online_cpus(); - } return 0; } @@ -728,7 +699,7 @@ __cpufreq_cooling_register(struct device_node *np, list_add(&cpufreq_cdev->node, &cpufreq_cdev_list); mutex_unlock(&cooling_list_lock); - if (first) + if (first && !cpufreq_cdev->plat_ops) cpufreq_register_notifier(&thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); @@ -868,9 +839,10 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) mutex_unlock(&cooling_list_lock); if (last) { - cpufreq_unregister_notifier( - &thermal_cpufreq_notifier_block, - CPUFREQ_POLICY_NOTIFIER); + if (!cpufreq_cdev->plat_ops) + cpufreq_unregister_notifier( + &thermal_cpufreq_notifier_block, + CPUFREQ_POLICY_NOTIFIER); } thermal_cooling_device_unregister(cpufreq_cdev->cdev); diff --git a/drivers/thermal/qcom/bcl_pmic5.c b/drivers/thermal/qcom/bcl_pmic5.c index b96ad7cdece0..bdef0ddb47d6 100644 --- a/drivers/thermal/qcom/bcl_pmic5.c +++ b/drivers/thermal/qcom/bcl_pmic5.c @@ -24,8 +24,6 @@ #define BCL_DRIVER_NAME "bcl_pmic5" #define BCL_MONITOR_EN 0x46 #define BCL_IRQ_STATUS 0x08 -#define BCL_IADC_BF_DGL_CTL 0x59 -#define BCL_IADC_BF_DGL_16MS 0x0E #define BCL_IBAT_HIGH 0x4B #define BCL_IBAT_TOO_HIGH 0x4C @@ -597,23 +595,9 @@ static void bcl_probe_lvls(struct platform_device *pdev, bcl_lvl_init(pdev, BCL_LVL2, BCL_IRQ_L2, bcl_perph); } -static void bcl_iadc_bf_degl_set(struct bcl_device *bcl_perph, int time) -{ - int ret; - int data = 0; - ret = bcl_read_register(bcl_perph, BCL_IADC_BF_DGL_CTL, &data); - if (ret) - return; - data = (data & 0xF0) | time; - ret = bcl_write_register(bcl_perph, BCL_IADC_BF_DGL_CTL, data); - if (ret) - return; -} - static void bcl_configure_bcl_peripheral(struct bcl_device *bcl_perph) { bcl_write_register(bcl_perph, BCL_MONITOR_EN, BIT(7)); - bcl_iadc_bf_degl_set(bcl_perph, BCL_IADC_BF_DGL_16MS); } static int bcl_remove(struct platform_device *pdev) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 101f389dcef9..54d71678a63c 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -22,7 +22,6 @@ #include #include #include -#include #ifdef CONFIG_DRM #include @@ -69,8 +68,13 @@ struct screen_monitor sm; #endif static struct device thermal_message_dev; -static atomic_t switch_mode = ATOMIC_INIT(10); +static atomic_t switch_mode = ATOMIC_INIT(-1); static atomic_t temp_state = ATOMIC_INIT(0); +static atomic_t balance_mode = ATOMIC_INIT(0); +static atomic_t board_sensor_temp_comp_default = ATOMIC_INIT(0); +static atomic_t cpu_nolimit_temp_default = ATOMIC_INIT(0); +static atomic_t wifi_limit = ATOMIC_INIT(0); + static char boost_buf[128]; const char *board_sensor; static char board_sensor_temp[128]; @@ -1728,14 +1732,12 @@ static ssize_t thermal_sconfig_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - int ret, val = -1; + int val = -1; - ret = kstrtoint(buf, 10, &val); + val = simple_strtol(buf, NULL, 10); atomic_set(&switch_mode, val); - if (ret) - return ret; return len; } @@ -1761,6 +1763,29 @@ thermal_boost_store(struct device *dev, static DEVICE_ATTR(boost, 0644, thermal_boost_show, thermal_boost_store); +static ssize_t +thermal_balance_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&balance_mode)); +} + +static ssize_t +thermal_balance_mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int val = -1; + + val = simple_strtol(buf, NULL, 10); + + atomic_set(&balance_mode, val); + + return len; +} + +static DEVICE_ATTR(balance_mode, 0664, + thermal_balance_mode_show, thermal_balance_mode_store); + static ssize_t thermal_temp_state_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1772,14 +1797,12 @@ static ssize_t thermal_temp_state_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - int ret, val = -1; + int val = -1; - ret = kstrtoint(buf, 10, &val); + val = simple_strtol(buf, NULL, 10); atomic_set(&temp_state, val); - if (ret) - return ret; return len; } @@ -1805,8 +1828,6 @@ cpu_limits_store(struct device *dev, return -EINVAL; } - cpu_limits_set_level(cpu, max); - return len; } @@ -1846,6 +1867,72 @@ static DEVICE_ATTR(board_sensor_temp, 0664, thermal_board_sensor_temp_show, thermal_board_sensor_temp_store); static ssize_t +thermal_board_sensor_temp_comp_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&board_sensor_temp_comp_default)); +} + +static ssize_t +thermal_board_sensor_temp_comp_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int val = -1; + + val = simple_strtol(buf, NULL, 10); + + atomic_set(&board_sensor_temp_comp_default, val); + + return len; +} + +static DEVICE_ATTR(board_sensor_temp_comp, 0664, + thermal_board_sensor_temp_comp_show, thermal_board_sensor_temp_comp_store); + +static ssize_t +thermal_wifi_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&wifi_limit)); +} +static ssize_t +thermal_wifi_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int val = -1; + + val = simple_strtol(buf, NULL, 10); + + atomic_set(&wifi_limit, val); + return len; +} + +static DEVICE_ATTR(wifi_limit, 0664, + thermal_wifi_limit_show, thermal_wifi_limit_store); + +static ssize_t +thermal_cpu_nolimit_temp_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&cpu_nolimit_temp_default)); +} + +static ssize_t +thermal_cpu_nolimit_temp_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int val = -1; + + val = simple_strtol(buf, NULL, 10); + + atomic_set(&cpu_nolimit_temp_default, val); + + return len; +} + +static DEVICE_ATTR(cpu_nolimit_temp, 0664, + thermal_cpu_nolimit_temp_show, thermal_cpu_nolimit_temp_store); +static ssize_t thermal_ambient_sensor_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1915,6 +2002,22 @@ static int create_thermal_message_node(void) if (ret < 0) pr_warn("Thermal: create board sensor temp node failed\n"); + ret = sysfs_create_file(&thermal_message_dev.kobj, &dev_attr_board_sensor_temp_comp.attr); + if (ret < 0) + pr_warn("Thermal: create board sensor temp comp node failed\n"); + + ret = sysfs_create_file(&thermal_message_dev.kobj, &dev_attr_balance_mode.attr); + if (ret < 0) + pr_warn("Thermal: create balance mode node failed\n"); + + ret = sysfs_create_file(&thermal_message_dev.kobj, &dev_attr_wifi_limit.attr); + if (ret < 0) + pr_warn("Thermal: create wifi limit node failed\n"); + + ret = sysfs_create_file(&thermal_message_dev.kobj, &dev_attr_cpu_nolimit_temp.attr); + if (ret < 0) + pr_warn("Thermal: create cpu nolimit node failed\n"); + ret = sysfs_create_file(&thermal_message_dev.kobj, &dev_attr_ambient_sensor.attr); if (ret < 0) pr_warn("Thermal: create ambient sensor node failed\n"); @@ -2020,23 +2123,6 @@ static int __init thermal_init(void) pr_warn("Thermal: Can not register suspend notifier, return %d\n", result); - result = of_parse_thermal_message(); - if (result) - pr_warn("Thermal: Can not parse thermal message node, return %d\n", - result); - - result = create_thermal_message_node(); - if (result) - pr_warn("Thermal: create thermal message node failed, return %d\n", - result); - -#ifdef CONFIG_DRM - sm.thermal_notifier.notifier_call = screen_state_for_thermal_callback; - if (mi_drm_register_client(&sm.thermal_notifier) < 0) { - pr_warn("Thermal: register screen state callback failed\n"); - } -#endif - return 0; exit_zone_parse: @@ -2056,14 +2142,10 @@ static int __init thermal_init(void) static void thermal_exit(void) { -#ifdef CONFIG_DRM - mi_drm_unregister_client(&sm.thermal_notifier); -#endif unregister_pm_notifier(&thermal_pm_nb); of_thermal_destroy_zones(); destroy_workqueue(thermal_passive_wq); genetlink_exit(); - destroy_thermal_message_node(); class_unregister(&thermal_class); thermal_unregister_governors(); ida_destroy(&thermal_tz_ida); From 53d79ae76bf5448170ca2f3ad17b330c1b770369 Mon Sep 17 00:00:00 2001 From: Carlos Ayrton Lopez Arroyo <15030201@itcelaya.edu.mx> Date: Tue, 31 May 2022 22:47:02 -0500 Subject: [PATCH 09/44] Revert "drivers: power: supply: Disable battery capacity learning" This reverts commit ca6a2b20bd8979f93332339038d1481a3dc56b71. --- drivers/power/supply/qcom/qpnp-qg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/qcom/qpnp-qg.c b/drivers/power/supply/qcom/qpnp-qg.c index 6cabf02b117a..9a03758ce0f8 100644 --- a/drivers/power/supply/qcom/qpnp-qg.c +++ b/drivers/power/supply/qcom/qpnp-qg.c @@ -3872,7 +3872,7 @@ static int qg_alg_init(struct qpnp_qg *chip) "qcom,cl-disable"); /*Return if capacity learning is disabled*/ - if (!chip->dt.cl_disable) + if (chip->dt.cl_disable) return 0; cl = devm_kzalloc(chip->dev, sizeof(*cl), GFP_KERNEL); From 5dd01f5a282acee54ddbd8797795434beda5e035 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 11 Jan 2019 12:49:54 -0800 Subject: [PATCH 10/44] qpnp-smb5: Add support for PROP_MOISTURE_DETECTION_ENABLED To enable: echo 1 > /sys/class/power_supply/usb/moisture_detection_enabled To disable: echo 0 > /sys/class/power_supply/usb/moisture_detection_enabled Bug: 119562608 Bug: 77606903 Change-Id: Iec6a63e0aaa78f829ad25e56479634e6f82c8001 Signed-off-by: Badhri Jagan Sridharan Signed-off-by: UtsavBalar1231 --- drivers/power/supply/qcom/qpnp-smb5.c | 9 +++ drivers/power/supply/qcom/smb5-lib.c | 92 ++++++++++++++++++++++++--- drivers/power/supply/qcom/smb5-lib.h | 3 + 3 files changed, 96 insertions(+), 8 deletions(-) diff --git a/drivers/power/supply/qcom/qpnp-smb5.c b/drivers/power/supply/qcom/qpnp-smb5.c index ae3b6d24f18a..d3bf1ca1c151 100644 --- a/drivers/power/supply/qcom/qpnp-smb5.c +++ b/drivers/power/supply/qcom/qpnp-smb5.c @@ -1458,6 +1458,7 @@ static enum power_supply_property smb5_usb_props[] = { POWER_SUPPLY_PROP_APDO_MAX, POWER_SUPPLY_PROP_CHARGER_STATUS, POWER_SUPPLY_PROP_INPUT_VOLTAGE_SETTLED, + POWER_SUPPLY_PROP_MOISTURE_DETECTION_ENABLE, }; static int smb5_usb_get_prop(struct power_supply *psy, @@ -1471,6 +1472,9 @@ static int smb5_usb_get_prop(struct power_supply *psy, val->intval = 0; switch (psp) { + case POWER_SUPPLY_PROP_MOISTURE_DETECTION_ENABLE: + val->intval = chg->moisture_detection_enabled ? 1 : 0; + break; case POWER_SUPPLY_PROP_PRESENT: rc = smblib_get_prop_usb_present(chg, val); break; @@ -1697,6 +1701,9 @@ static int smb5_usb_set_prop(struct power_supply *psy, case POWER_SUPPLY_PROP_FAKE_HVDCP3: chg->fake_hvdcp3 = val->intval; break; + case POWER_SUPPLY_PROP_MOISTURE_DETECTION_ENABLE: + rc = smblib_enable_moisture_detection(chg, val->intval == 1); + break; case POWER_SUPPLY_PROP_PD_CURRENT_MAX: rc = smblib_set_prop_pd_current_max(chg, val); break; @@ -1823,6 +1830,7 @@ static int smb5_usb_prop_is_writeable(struct power_supply *psy, case POWER_SUPPLY_PROP_ADAPTER_CC_MODE: case POWER_SUPPLY_PROP_APSD_RERUN: case POWER_SUPPLY_PROP_APDO_MAX: + case POWER_SUPPLY_PROP_MOISTURE_DETECTION_ENABLE: return 1; default: break; @@ -4895,6 +4903,7 @@ static int smb5_probe(struct platform_device *pdev) chg->main_fcc_max = -EINVAL; chg->warm_fake_charging = false; chg->fake_dc_on = false; + chg->moisture_detection_enabled = true; mutex_init(&chg->adc_lock); chg->regmap = dev_get_regmap(chg->dev->parent, NULL); diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c index 3913ebb07be9..def4fada45a3 100644 --- a/drivers/power/supply/qcom/smb5-lib.c +++ b/drivers/power/supply/qcom/smb5-lib.c @@ -9003,6 +9003,11 @@ enum alarmtimer_restart smblib_lpd_recheck_timer(struct alarm *alarm, struct smb_charger *chg = container_of(alarm, struct smb_charger, lpd_recheck_timer); int rc; + unsigned long flags; + + spin_lock_irqsave(&chg->moisture_detection_enable, flags); + if (!chg->moisture_detection_enabled) + goto disable; if (chg->lpd_reason == LPD_MOISTURE_DETECTED) { pval.intval = POWER_SUPPLY_TYPEC_PR_DUAL; @@ -9010,7 +9015,7 @@ enum alarmtimer_restart smblib_lpd_recheck_timer(struct alarm *alarm, if (rc < 0) { smblib_err(chg, "Couldn't write 0x%02x to TYPE_C_INTRPT_ENB_SOFTWARE_CTRL rc=%d\n", pval.intval, rc); - return ALARMTIMER_NORESTART; + goto exit; } chg->moisture_present = false; power_supply_changed(chg->usb_psy); @@ -9021,16 +9026,61 @@ enum alarmtimer_restart smblib_lpd_recheck_timer(struct alarm *alarm, if (rc < 0) { smblib_err(chg, "Couldn't set TYPE_C_INTERRUPT_EN_CFG_2_REG rc=%d\n", rc); - return ALARMTIMER_NORESTART; + goto exit; } } +disable: chg->lpd_stage = LPD_STAGE_NONE; chg->lpd_reason = LPD_NONE; +exit: + spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); + return ALARMTIMER_NORESTART; } +int smblib_enable_moisture_detection(struct smb_charger *chg, bool enable) +{ + int rc = 0; + unsigned long flags; + + if (enable == chg->moisture_detection_enabled) + return 0; + + cancel_delayed_work_sync(&chg->lpd_ra_open_work); + alarm_cancel(&chg->lpd_recheck_timer); + + spin_lock_irqsave(&chg->moisture_detection_enable, flags); + rc = smblib_masked_write(chg, TYPE_C_INTERRUPT_EN_CFG_2_REG, + TYPEC_WATER_DETECTION_INT_EN_BIT, + enable ? + TYPEC_WATER_DETECTION_INT_EN_BIT : 0); + if (rc < 0) { + smblib_err(chg, "Couldn't set TYPE_C_INTERRUPT_EN_CFG_2_REG rc=%d\n", rc); + goto exit; + } + + chg->moisture_detection_enabled = enable; + + if (!chg->moisture_detection_enabled) { + union power_supply_propval pval; + + pval.intval = POWER_SUPPLY_TYPEC_PR_DUAL; + if (smblib_set_prop_typec_power_role(chg, &pval) < 0) + smblib_err(chg, "Couldn't enable DRP\n"); + + chg->lpd_stage = LPD_STAGE_NONE; + chg->lpd_reason = LPD_NONE; + vote(chg->awake_votable, LPD_VOTER, false, 0); + power_supply_changed(chg->usb_psy); + } + +exit: + spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); + return rc; +} + #define RSBU_K_300K_UV 3000000 static bool smblib_src_lpd(struct smb_charger *chg) { @@ -9038,6 +9088,7 @@ static bool smblib_src_lpd(struct smb_charger *chg) bool lpd_flag = false; u8 stat; int rc; + unsigned long flags; if (chg->lpd_disabled) return false; @@ -9061,6 +9112,10 @@ static bool smblib_src_lpd(struct smb_charger *chg) break; } + spin_lock_irqsave(&chg->moisture_detection_enable, flags); + if (!chg->moisture_detection_enabled) + lpd_flag = false; + if (lpd_flag) { chg->lpd_stage = LPD_STAGE_COMMIT; pval.intval = POWER_SUPPLY_TYPEC_PR_SINK; @@ -9079,6 +9134,7 @@ static bool smblib_src_lpd(struct smb_charger *chg) chg->typec_mode = smblib_get_prop_typec_mode(chg); } + spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); return lpd_flag; } @@ -9578,6 +9634,7 @@ static void smblib_lpd_launch_ra_open_work(struct smb_charger *chg) { u8 stat; int rc; + unsigned long flags; if (chg->lpd_disabled) return; @@ -9593,9 +9650,15 @@ static void smblib_lpd_launch_ra_open_work(struct smb_charger *chg) && chg->lpd_stage == LPD_STAGE_NONE) { chg->lpd_stage = LPD_STAGE_FLOAT; cancel_delayed_work_sync(&chg->lpd_ra_open_work); - vote(chg->awake_votable, LPD_VOTER, true, 0); - schedule_delayed_work(&chg->lpd_ra_open_work, + spin_lock_irqsave(&chg->moisture_detection_enable, flags); + if (chg->moisture_detection_enabled) { + vote(chg->awake_votable, LPD_VOTER, true, 0); + schedule_delayed_work(&chg->lpd_ra_open_work, msecs_to_jiffies(300)); + } else { + chg->lpd_stage = LPD_STAGE_NONE; + } + spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); } } @@ -11699,6 +11762,8 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) union power_supply_propval pval; u8 stat; int rc; + unsigned long flags; + bool rsbux_low = false; if (chg->pr_swap_in_progress || chg->pd_hard_reset) { chg->lpd_stage = LPD_STAGE_NONE; @@ -11736,14 +11801,22 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) /* Wait 1.5ms to get SBUx ready */ usleep_range(1500, 1510); - if (smblib_rsbux_low(chg, RSBU_K_300K_UV)) { + rsbux_low = smblib_rsbux_low(chg, RSBU_K_300K_UV); + + spin_lock_irqsave(&chg->moisture_detection_enable, flags); + if (!chg->moisture_detection_enabled) { + chg->lpd_stage = LPD_STAGE_NONE; + goto unlock; + } + + if (rsbux_low) { /* Moisture detected, enable sink only mode */ pval.intval = POWER_SUPPLY_TYPEC_PR_SINK; rc = smblib_set_prop_typec_power_role(chg, &pval); if (rc < 0) { smblib_err(chg, "Couldn't set typec sink only rc=%d\n", rc); - goto out; + goto unlock; } chg->lpd_reason = LPD_MOISTURE_DETECTED; @@ -11757,7 +11830,7 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) if (rc < 0) { smblib_err(chg, "Couldn't set TYPE_C_INTERRUPT_EN_CFG_2_REG rc=%d\n", rc); - goto out; + goto unlock; } /* restore DRP mode */ @@ -11766,7 +11839,7 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) if (rc < 0) { smblib_err(chg, "Couldn't write 0x%02x to TYPE_C_INTRPT_ENB_SOFTWARE_CTRL rc=%d\n", pval.intval, rc); - goto out; + goto unlock; } chg->lpd_reason = LPD_FLOATING_CABLE; @@ -11774,6 +11847,8 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) /* recheck in 60 seconds */ alarm_start_relative(&chg->lpd_recheck_timer, ms_to_ktime(60000)); +unlock: + spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); out: vote(chg->awake_votable, LPD_VOTER, false, 0); } @@ -12001,6 +12076,7 @@ int smblib_init(struct smb_charger *chg) mutex_init(&chg->dcin_aicl_lock); mutex_init(&chg->dpdm_lock); spin_lock_init(&chg->typec_pr_lock); + spin_lock_init(&chg->moisture_detection_enable); INIT_WORK(&chg->batt_update_work, batt_update_work); INIT_WORK(&chg->bms_update_work, bms_update_work); INIT_WORK(&chg->pl_update_work, pl_update_work); diff --git a/drivers/power/supply/qcom/smb5-lib.h b/drivers/power/supply/qcom/smb5-lib.h index 2a3d5879c9b3..46c3831120b7 100644 --- a/drivers/power/supply/qcom/smb5-lib.h +++ b/drivers/power/supply/qcom/smb5-lib.h @@ -561,6 +561,7 @@ struct smb_charger { struct mutex ps_change_lock; struct mutex irq_status_lock; struct mutex dcin_aicl_lock; + spinlock_t moisture_detection_enable; spinlock_t typec_pr_lock; struct mutex adc_lock; struct mutex dpdm_lock; @@ -784,6 +785,7 @@ struct smb_charger { int jeita_soft_fcc[2]; int jeita_soft_fv[2]; bool moisture_present; + bool moisture_detection_enabled; bool uusb_moisture_protection_capable; bool uusb_moisture_protection_enabled; bool hw_die_temp_mitigation; @@ -1233,6 +1235,7 @@ int smblib_night_charging_func(struct smb_charger *chg, union power_supply_propval *val); int smblib_get_quick_charge_type(struct smb_charger *chg); int smblib_get_qc3_main_icl_offset(struct smb_charger *chg, int *offset_ua); +int smblib_enable_moisture_detection(struct smb_charger *chg, bool enable); int smblib_dp_dm_bq(struct smb_charger *chg, int val); int smblib_get_prop_battery_charging_enabled(struct smb_charger *chg, From f6627187bf7bbe6921c651e805063146f15eed4f Mon Sep 17 00:00:00 2001 From: yanyh2 Date: Thu, 11 Jun 2020 18:26:03 +0800 Subject: [PATCH 11/44] qpnp-smb5: check lpd_disable before moisture detect As property lpd_disable has high priority to moisture_detection_enabled so force moisture_detection_enabled to false if lpd_disable is set by dts configuration. Change-Id: I62937ca757efcd4b1a206bb627d955220a5b028c Signed-off-by: yanyh2 Reviewed-on: https://gerrit.mot.com/1614252 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Huosheng Liao Reviewed-by: Haijian Ma Submit-Approved: Jira Key Reviewed-on: https://gerrit.mot.com/1759885 Reviewed-by: Wei Xu Reviewed-by: Xiangpo Zhao Signed-off-by: UtsavBalar1231 --- drivers/power/supply/qcom/qpnp-smb5.c | 1 + drivers/power/supply/qcom/smb5-lib.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/qcom/qpnp-smb5.c b/drivers/power/supply/qcom/qpnp-smb5.c index d3bf1ca1c151..20434ceda426 100644 --- a/drivers/power/supply/qcom/qpnp-smb5.c +++ b/drivers/power/supply/qcom/qpnp-smb5.c @@ -673,6 +673,7 @@ static int smb5_parse_dt_misc(struct smb5 *chip, struct device_node *node) chg->lpd_disabled = chg->lpd_disabled || of_property_read_bool(node, "qcom,lpd-disable"); + chg->moisture_detection_enabled = !chg->lpd_disabled; chg->use_bq_pump = of_property_read_bool(node, "mi,use-bq-pump"); diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c index def4fada45a3..0cd22c0e978c 100644 --- a/drivers/power/supply/qcom/smb5-lib.c +++ b/drivers/power/supply/qcom/smb5-lib.c @@ -9045,9 +9045,13 @@ int smblib_enable_moisture_detection(struct smb_charger *chg, bool enable) int rc = 0; unsigned long flags; - if (enable == chg->moisture_detection_enabled) + if (chg->lpd_disabled) return 0; + mutex_lock(&chg->moisture_detection_enable); + if (enable == chg->moisture_detection_enabled) + goto exit; + cancel_delayed_work_sync(&chg->lpd_ra_open_work); alarm_cancel(&chg->lpd_recheck_timer); From 27ca54194409193533f24db79fb6525991b0368a Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 22 Mar 2019 14:52:24 -0700 Subject: [PATCH 12/44] smb5-lib: Use mutex instead of spin lock for moisture_detection_enable The vote API uses a mutex to guard the votable structure. Therefore, use a mutex instead of spinlock for moisture_detection_enable. This should be safe to do so as the irqs are registered as threaded irqs. Bug: 128576960 Change-Id: I309db8429c4f43134b4e469f0159fec079da2097 Signed-off-by: Badhri Jagan Sridharan Signed-off-by: UtsavBalar1231 --- drivers/power/supply/qcom/smb5-lib.c | 83 +++++++++++++++------------- drivers/power/supply/qcom/smb5-lib.h | 6 +- 2 files changed, 49 insertions(+), 40 deletions(-) diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c index 0cd22c0e978c..16d725bb222c 100644 --- a/drivers/power/supply/qcom/smb5-lib.c +++ b/drivers/power/supply/qcom/smb5-lib.c @@ -8999,13 +8999,23 @@ irqreturn_t usb_source_change_irq_handler(int irq, void *data) enum alarmtimer_restart smblib_lpd_recheck_timer(struct alarm *alarm, ktime_t time) { - union power_supply_propval pval; struct smb_charger *chg = container_of(alarm, struct smb_charger, lpd_recheck_timer); + + if (queue_work(chg->wq, &chg->lpd_recheck_work)) + pm_stay_awake(chg->dev); + + return ALARMTIMER_NORESTART; +} + +static void lpd_recheck_work(struct work_struct *work) +{ + union power_supply_propval pval; + struct smb_charger *chg = container_of(work, struct smb_charger, + lpd_recheck_work); int rc; - unsigned long flags; - spin_lock_irqsave(&chg->moisture_detection_enable, flags); + mutex_lock(&chg->moisture_detection_enable); if (!chg->moisture_detection_enabled) goto disable; @@ -9035,15 +9045,14 @@ enum alarmtimer_restart smblib_lpd_recheck_timer(struct alarm *alarm, chg->lpd_reason = LPD_NONE; exit: - spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); - - return ALARMTIMER_NORESTART; + mutex_unlock(&chg->moisture_detection_enable); + pm_relax(chg->dev); + return; } int smblib_enable_moisture_detection(struct smb_charger *chg, bool enable) { int rc = 0; - unsigned long flags; if (chg->lpd_disabled) return 0; @@ -9054,8 +9063,8 @@ int smblib_enable_moisture_detection(struct smb_charger *chg, bool enable) cancel_delayed_work_sync(&chg->lpd_ra_open_work); alarm_cancel(&chg->lpd_recheck_timer); + mutex_lock(&chg->moisture_detection_enable); - spin_lock_irqsave(&chg->moisture_detection_enable, flags); rc = smblib_masked_write(chg, TYPE_C_INTERRUPT_EN_CFG_2_REG, TYPEC_WATER_DETECTION_INT_EN_BIT, enable ? @@ -9081,7 +9090,7 @@ int smblib_enable_moisture_detection(struct smb_charger *chg, bool enable) } exit: - spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); + mutex_unlock(&chg->moisture_detection_enable); return rc; } @@ -9092,16 +9101,16 @@ static bool smblib_src_lpd(struct smb_charger *chg) bool lpd_flag = false; u8 stat; int rc; - unsigned long flags; - if (chg->lpd_disabled) - return false; + mutex_lock(&chg->moisture_detection_enable); + if (chg->lpd_disabled || !chg->moisture_detection_enabled) + goto exit; rc = smblib_read(chg, TYPE_C_SRC_STATUS_REG, &stat); if (rc < 0) { smblib_err(chg, "Couldn't read TYPE_C_SRC_STATUS_REG rc=%d\n", rc); - return false; + goto exit; } switch (stat & DETECTED_SNK_TYPE_MASK) { @@ -9116,10 +9125,6 @@ static bool smblib_src_lpd(struct smb_charger *chg) break; } - spin_lock_irqsave(&chg->moisture_detection_enable, flags); - if (!chg->moisture_detection_enabled) - lpd_flag = false; - if (lpd_flag) { chg->lpd_stage = LPD_STAGE_COMMIT; pval.intval = POWER_SUPPLY_TYPEC_PR_SINK; @@ -9138,7 +9143,8 @@ static bool smblib_src_lpd(struct smb_charger *chg) chg->typec_mode = smblib_get_prop_typec_mode(chg); } - spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); +exit: + mutex_unlock(&chg->moisture_detection_enable); return lpd_flag; } @@ -9638,7 +9644,6 @@ static void smblib_lpd_launch_ra_open_work(struct smb_charger *chg) { u8 stat; int rc; - unsigned long flags; if (chg->lpd_disabled) return; @@ -9654,7 +9659,7 @@ static void smblib_lpd_launch_ra_open_work(struct smb_charger *chg) && chg->lpd_stage == LPD_STAGE_NONE) { chg->lpd_stage = LPD_STAGE_FLOAT; cancel_delayed_work_sync(&chg->lpd_ra_open_work); - spin_lock_irqsave(&chg->moisture_detection_enable, flags); + mutex_lock(&chg->moisture_detection_enable); if (chg->moisture_detection_enabled) { vote(chg->awake_votable, LPD_VOTER, true, 0); schedule_delayed_work(&chg->lpd_ra_open_work, @@ -9662,7 +9667,7 @@ static void smblib_lpd_launch_ra_open_work(struct smb_charger *chg) } else { chg->lpd_stage = LPD_STAGE_NONE; } - spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); + mutex_unlock(&chg->moisture_detection_enable); } } @@ -11766,10 +11771,11 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) union power_supply_propval pval; u8 stat; int rc; - unsigned long flags; - bool rsbux_low = false; - if (chg->pr_swap_in_progress || chg->pd_hard_reset) { + mutex_lock(&chg->moisture_detection_enable); + + if (chg->pr_swap_in_progress || chg->pd_hard_reset + || !chg->moisture_detection_enabled) { chg->lpd_stage = LPD_STAGE_NONE; goto out; } @@ -11805,22 +11811,14 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) /* Wait 1.5ms to get SBUx ready */ usleep_range(1500, 1510); - rsbux_low = smblib_rsbux_low(chg, RSBU_K_300K_UV); - - spin_lock_irqsave(&chg->moisture_detection_enable, flags); - if (!chg->moisture_detection_enabled) { - chg->lpd_stage = LPD_STAGE_NONE; - goto unlock; - } - - if (rsbux_low) { + if (smblib_rsbux_low(chg, RSBU_K_300K_UV)) { /* Moisture detected, enable sink only mode */ pval.intval = POWER_SUPPLY_TYPEC_PR_SINK; rc = smblib_set_prop_typec_power_role(chg, &pval); if (rc < 0) { smblib_err(chg, "Couldn't set typec sink only rc=%d\n", rc); - goto unlock; + goto out; } chg->lpd_reason = LPD_MOISTURE_DETECTED; @@ -11834,7 +11832,7 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) if (rc < 0) { smblib_err(chg, "Couldn't set TYPE_C_INTERRUPT_EN_CFG_2_REG rc=%d\n", rc); - goto unlock; + goto out; } /* restore DRP mode */ @@ -11843,7 +11841,7 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) if (rc < 0) { smblib_err(chg, "Couldn't write 0x%02x to TYPE_C_INTRPT_ENB_SOFTWARE_CTRL rc=%d\n", pval.intval, rc); - goto unlock; + goto out; } chg->lpd_reason = LPD_FLOATING_CABLE; @@ -11851,9 +11849,8 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) /* recheck in 60 seconds */ alarm_start_relative(&chg->lpd_recheck_timer, ms_to_ktime(60000)); -unlock: - spin_unlock_irqrestore(&chg->moisture_detection_enable, flags); out: + mutex_unlock(&chg->moisture_detection_enable); vote(chg->awake_votable, LPD_VOTER, false, 0); } @@ -12076,16 +12073,17 @@ int smblib_init(struct smb_charger *chg) int rc = 0; mutex_init(&chg->smb_lock); + mutex_init(&chg->moisture_detection_enable); mutex_init(&chg->irq_status_lock); mutex_init(&chg->dcin_aicl_lock); mutex_init(&chg->dpdm_lock); spin_lock_init(&chg->typec_pr_lock); - spin_lock_init(&chg->moisture_detection_enable); INIT_WORK(&chg->batt_update_work, batt_update_work); INIT_WORK(&chg->bms_update_work, bms_update_work); INIT_WORK(&chg->pl_update_work, pl_update_work); INIT_WORK(&chg->jeita_update_work, jeita_update_work); INIT_WORK(&chg->dcin_aicl_work, dcin_aicl_work); + INIT_WORK(&chg->lpd_recheck_work, lpd_recheck_work); INIT_WORK(&chg->cp_status_change_work, smblib_cp_status_change_work); INIT_WORK(&chg->batt_verify_update_work, smblib_batt_verify_update_work); INIT_WORK(&chg->plugin_check_time_work, smblib_plugin_check_time_work); @@ -12268,6 +12266,12 @@ int smblib_init(struct smb_charger *chg) return -EINVAL; } + chg->wq = create_singlethread_workqueue(dev_name(chg->dev)); + if (!chg->wq) { + smblib_err(chg, "workqueue creation failed\n"); + return -ENODEV; + } + return rc; } @@ -12331,6 +12335,7 @@ int smblib_deinit(struct smb_charger *chg) } smblib_iio_deinit(chg); + destroy_workqueue(chg->wq); return 0; } diff --git a/drivers/power/supply/qcom/smb5-lib.h b/drivers/power/supply/qcom/smb5-lib.h index 46c3831120b7..f79a768edb9e 100644 --- a/drivers/power/supply/qcom/smb5-lib.h +++ b/drivers/power/supply/qcom/smb5-lib.h @@ -560,8 +560,8 @@ struct smb_charger { struct mutex smb_lock; struct mutex ps_change_lock; struct mutex irq_status_lock; + struct mutex moisture_detection_enable; struct mutex dcin_aicl_lock; - spinlock_t moisture_detection_enable; spinlock_t typec_pr_lock; struct mutex adc_lock; struct mutex dpdm_lock; @@ -953,6 +953,10 @@ struct smb_charger { bool flag_second_ffc_term_current; int night_chg_flag; + + /* lpd timer work */ + struct workqueue_struct *wq; + struct work_struct lpd_recheck_work; }; int smblib_read(struct smb_charger *chg, u16 addr, u8 *val); From e7c3e1f349fbcdc2f11196f08b125ca1c5f1091a Mon Sep 17 00:00:00 2001 From: yanyh2 Date: Tue, 16 Jun 2020 14:55:41 +0800 Subject: [PATCH 13/44] smb5-lib: clear moisture_detected and icl vote When user select "Enable USB" option to force disabling moisture detection, the moisture_detected flag and moisture protection voter for USB ICL should be cleared, otherwise they will hold the value until next source detach event. This patch will fix the following issues: 1. Moisture_detected does not clear when moisture_detection_enabled is clear. 2. Moisture USB ICL voter stop charGing after moisture is cleaned for the first time when a charger attach. Change-Id: I7c89861e98f9d5e73ca612a5b4546cfcce4d5a18 Signed-off-by: yanyh2 Reviewed-on: https://gerrit.mot.com/1623576 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Tested-by: Jira Key Reviewed-by: Huosheng Liao Reviewed-by: Haijian Ma Submit-Approved: Jira Key Reviewed-on: https://gerrit.mot.com/1759887 Reviewed-by: Wei Xu Reviewed-by: Xiangpo Zhao Signed-off-by: UtsavBalar1231 --- drivers/power/supply/qcom/smb5-lib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c index 16d725bb222c..41273785492b 100644 --- a/drivers/power/supply/qcom/smb5-lib.c +++ b/drivers/power/supply/qcom/smb5-lib.c @@ -9086,6 +9086,8 @@ int smblib_enable_moisture_detection(struct smb_charger *chg, bool enable) chg->lpd_stage = LPD_STAGE_NONE; chg->lpd_reason = LPD_NONE; vote(chg->awake_votable, LPD_VOTER, false, 0); + chg->moisture_present = false; + vote(chg->usb_icl_votable, LPD_VOTER, false, 0); power_supply_changed(chg->usb_psy); } @@ -9750,6 +9752,8 @@ static void smblib_lpd_clear_ra_open_work(struct smb_charger *chg) chg->lpd_stage = LPD_STAGE_FLOAT_CANCEL; cancel_delayed_work_sync(&chg->lpd_ra_open_work); vote(chg->awake_votable, LPD_VOTER, false, 0); + chg->moisture_present = false; + vote(chg->usb_icl_votable, LPD_VOTER, false, 0); } #define TYPEC_DETACH_DETECT_DELAY_MS 2000 From 724f12f3c2ef22231627b2516d642396b3ee1516 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 12 Dec 2018 11:22:21 -0800 Subject: [PATCH 14/44] ANDROID: GKI: power_supply: Add PROP_MOISTURE_DETECTION_ENABLED Add a power supply prop to enable/disable moisture detection. Bug: 148999666 pick from android opensource: https://android.googlesource.com/kernel/msm/+log/refs/tags/android-10.0.0_r0.67/drivers/power/supply/qcom Change-Id: Ie5b3b721a7f8513990de798288fbf4dea361e5ed Signed-off-by: Badhri Jagan Sridharan (cherry picked from commit 84afb7d015a12d7933e1729856e5091166c2ab7c) Signed-off-by: rickyniu (cherry picked from commit 50d8261cd912d185c7602bb7f8960b4d4c8e7362) Signed-off-by: SaRavana Kannan Signed-off-by: UtsavBalar1231 --- drivers/power/supply/power_supply_sysfs.c | 1 + include/linux/power_supply.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 5fcd6cc4fc69..8cf00b8ad2cc 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -666,6 +666,7 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(otg_state), POWER_SUPPLY_ATTR(fg_type), POWER_SUPPLY_ATTR(charger_status), + POWER_SUPPLY_ATTR(moisture_detection_enabled), /* Local extensions of type int64_t */ POWER_SUPPLY_ATTR(charge_counter_ext), /* Properties of type `const char *' */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c198f0c94cc9..c13ac42b69b3 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -466,6 +466,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_OTG_STATE, POWER_SUPPLY_PROP_FG_TYPE, POWER_SUPPLY_PROP_CHARGER_STATUS, + POWER_SUPPLY_PROP_MOISTURE_DETECTION_ENABLE, /* Local extensions of type int64_t */ POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT, /* Properties of type `const char *' */ From 1186ced09cc81d7683772121c07328abf6654675 Mon Sep 17 00:00:00 2001 From: UtsavBalar1231 Date: Tue, 13 Jul 2021 11:36:27 +0530 Subject: [PATCH 15/44] msm-poweroff: Store restart reason in panic * In case of panic we can rely on warm reset to save the power up reason in memory which was previosly getting lost because of cold reset. * This change will also allows us to persist panic logs for debugging purposes. Test: Force a cold reboot using command "adb reboot bootloader" Change-Id: I21a4c0147a17eae5b997091c5e973252a55bca47 Signed-off-by: UtsavBalar1231 --- drivers/power/reset/msm-poweroff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/reset/msm-poweroff.c b/drivers/power/reset/msm-poweroff.c index 579f9c3d26b6..bc2485179411 100644 --- a/drivers/power/reset/msm-poweroff.c +++ b/drivers/power/reset/msm-poweroff.c @@ -511,7 +511,7 @@ static void msm_restart_prepare(const char *cmd) pr_info("Forcing a warm reset of the system\n"); /* Hard reset the PMIC unless memory contents must be maintained. */ - if (force_warm_reboot || need_warm_reset) + if (force_warm_reboot || need_warm_reset || in_panic) qpnp_pon_system_pwr_off(PON_POWER_OFF_WARM_RESET); else qpnp_pon_system_pwr_off(PON_POWER_OFF_HARD_RESET); From 63bb3c4b890e0c59226694a577bfc9a235ae3650 Mon Sep 17 00:00:00 2001 From: Lei Chen Date: Fri, 30 Apr 2021 15:11:17 +0800 Subject: [PATCH 16/44] LPD: notify the usb hal to acquire the moisture status Sometimes, there is no UI notification when liquid presence detection. Send uevent message to usb hal once the liquid is detected, then the moisture status will be transfered to the usb hal and framework. Change-Id: Ic9d07cbb81037a64b216cb6df14bc4e0ef4dabda Signed-off-by: Lei Chen Reviewed-on: https://gerrit.mot.com/1937372 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Submit-Approved: Lu Lu Tested-by: Jira Key Tested-by: Lu Lu Reviewed-by: Wei Xu Reviewed-by: Xiangpo Zhao Signed-off-by: UtsavBalar1231 --- drivers/power/supply/qcom/smb5-lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c index 41273785492b..e5638443e8f4 100644 --- a/drivers/power/supply/qcom/smb5-lib.c +++ b/drivers/power/supply/qcom/smb5-lib.c @@ -11828,6 +11828,7 @@ static void smblib_lpd_ra_open_work(struct work_struct *work) chg->lpd_reason = LPD_MOISTURE_DETECTED; chg->moisture_present = true; vote(chg->usb_icl_votable, LPD_VOTER, true, 0); + power_supply_changed(chg->usb_psy); } else { /* Floating cable, disable water detection irq temporarily */ From b441a3fa9e05fab8a2b82262569b9e55f0fdc54e Mon Sep 17 00:00:00 2001 From: Carlos Ayrton Lopez Arroyo <15030201@itcelaya.edu.mx> Date: Tue, 31 May 2022 23:34:41 -0500 Subject: [PATCH 17/44] zram: revert rice commits This reverts commit 285570fa2b478083356bbcdc84761981b01faf52. This reverts commit f63e3d54e4ce6ca3a6def38d5d88dbf73a635316. This reverts commit 7a50dbe2c1b73c154382dba7db4744a6ea1b0c72. This reverts commit fa607a1a4ccc52cc221fd4415adb7d7c83557240. This reverts commit 3c12dc9536ed209fb0d26bb91ef174a405236574. This reverts commit 6546f4262488b7df9c27892c534996e4125512c6. This reverts commit 88e23fae9f14a8ed1b2f237092bfa0cd0787da5c. This reverts commit 20d2ddedbcdcb83f6d2029a6825028171617ed3a. This reverts commit ab18f3c727338f7e3fa84af79462f99d880c078f. This reverts commit b1ffb9cb457dac8b5b44947a08b3c343b113795e. --- arch/x86/mm/pat_rbtree.c | 19 +++++++-- drivers/block/drbd/drbd_interval.c | 29 +++++++++++-- drivers/block/zram/Kconfig | 2 +- drivers/block/zram/zcomp.c | 52 +++++++++++++---------- drivers/block/zram/zcomp.h | 2 +- drivers/block/zram/zram_drv.c | 57 +++++++++++--------------- drivers/block/zram/zram_drv.h | 1 + include/linux/interval_tree_generic.h | 22 +++++++++- include/linux/rbtree_augmented.h | 34 --------------- lib/rbtree_test.c | 37 +++++++++-------- mm/mmap.c | 29 +++++-------- tools/include/linux/rbtree_augmented.h | 34 --------------- 12 files changed, 152 insertions(+), 166 deletions(-) diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 65ebe4b88f7c..fa16036fa592 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -54,10 +54,23 @@ static u64 get_subtree_max_end(struct rb_node *node) return ret; } -#define NODE_END(node) ((node)->end) +static u64 compute_subtree_max_end(struct memtype *data) +{ + u64 max_end = data->end, child_max_end; + + child_max_end = get_subtree_max_end(data->rb.rb_right); + if (child_max_end > max_end) + max_end = child_max_end; + + child_max_end = get_subtree_max_end(data->rb.rb_left); + if (child_max_end > max_end) + max_end = child_max_end; + + return max_end; +} -RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb, - struct memtype, rb, u64, subtree_max_end, NODE_END) +RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb, + u64, subtree_max_end, compute_subtree_max_end) /* Find the first (lowest start addr) overlapping range from rb tree */ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 651bd0236a99..c58986556161 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -13,10 +13,33 @@ sector_t interval_end(struct rb_node *node) return this->end; } -#define NODE_END(node) ((node)->sector + ((node)->size >> 9)) +/** + * compute_subtree_last - compute end of @node + * + * The end of an interval is the highest (start + (size >> 9)) value of this + * node and of its children. Called for @node and its parents whenever the end + * may have changed. + */ +static inline sector_t +compute_subtree_last(struct drbd_interval *node) +{ + sector_t max = node->sector + (node->size >> 9); + + if (node->rb.rb_left) { + sector_t left = interval_end(node->rb.rb_left); + if (left > max) + max = left; + } + if (node->rb.rb_right) { + sector_t right = interval_end(node->rb.rb_right); + if (right > max) + max = right; + } + return max; +} -RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks, - struct drbd_interval, rb, sector_t, end, NODE_END); +RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb, + sector_t, end, compute_subtree_last); /** * drbd_insert_interval - insert a new interval into a tree diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index bf64a66816a2..c8aab3115733 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -2,7 +2,7 @@ config ZRAM tristate "Compressed RAM block device support" depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO - select CRYPTO_LZ4 + select CRYPTO_LZO default n help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 071a4e7b461f..4ed0a78fdc09 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -19,9 +19,9 @@ #include "zcomp.h" static const char * const backends[] = { - "lz4", -#if IS_ENABLED(CONFIG_CRYPTO_LZO) "lzo", +#if IS_ENABLED(CONFIG_CRYPTO_LZ4) + "lz4", #endif #if IS_ENABLED(CONFIG_CRYPTO_LZ4HC) "lz4hc", @@ -32,6 +32,7 @@ static const char * const backends[] = { #if IS_ENABLED(CONFIG_CRYPTO_ZSTD) "zstd", #endif + NULL }; static void zcomp_strm_free(struct zcomp_strm *zstrm) @@ -39,16 +40,19 @@ static void zcomp_strm_free(struct zcomp_strm *zstrm) if (!IS_ERR_OR_NULL(zstrm->tfm)) crypto_free_comp(zstrm->tfm); free_pages((unsigned long)zstrm->buffer, 1); - zstrm->tfm = NULL; - zstrm->buffer = NULL; + kfree(zstrm); } /* - * Initialize zcomp_strm structure with ->tfm initialized by backend, and - * ->buffer. Return a negative value on error. + * allocate new zcomp_strm structure with ->tfm initialized by + * backend, return NULL on error */ -static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp) +static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) { + struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); + if (!zstrm) + return NULL; + zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0); /* * allocate 2 pages. 1 for compressed data, plus 1 extra for the @@ -57,16 +61,16 @@ static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp) zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) { zcomp_strm_free(zstrm); - return -ENOMEM; + zstrm = NULL; } - return 0; + return zstrm; } bool zcomp_available_algorithm(const char *comp) { int i; - i = sysfs_match_string(backends, comp); + i = __sysfs_match_string(backends, -1, comp); if (i >= 0) return true; @@ -85,9 +89,9 @@ ssize_t zcomp_available_show(const char *comp, char *buf) { bool known_algorithm = false; ssize_t sz = 0; - int i; + int i = 0; - for (i = 0; i < ARRAY_SIZE(backends); i++) { + for (; backends[i]; i++) { if (!strcmp(comp, backends[i])) { known_algorithm = true; sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, @@ -112,7 +116,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf) struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { - return get_cpu_ptr(comp->stream); + return *get_cpu_ptr(comp->stream); } void zcomp_stream_put(struct zcomp *comp) @@ -158,13 +162,17 @@ int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) { struct zcomp *comp = hlist_entry(node, struct zcomp, node); struct zcomp_strm *zstrm; - int ret; - zstrm = per_cpu_ptr(comp->stream, cpu); - ret = zcomp_strm_init(zstrm, comp); - if (ret) + if (WARN_ON(*per_cpu_ptr(comp->stream, cpu))) + return 0; + + zstrm = zcomp_strm_alloc(comp); + if (IS_ERR_OR_NULL(zstrm)) { pr_err("Can't allocate a compression stream\n"); - return ret; + return -ENOMEM; + } + *per_cpu_ptr(comp->stream, cpu) = zstrm; + return 0; } int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node) @@ -172,8 +180,10 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node) struct zcomp *comp = hlist_entry(node, struct zcomp, node); struct zcomp_strm *zstrm; - zstrm = per_cpu_ptr(comp->stream, cpu); - zcomp_strm_free(zstrm); + zstrm = *per_cpu_ptr(comp->stream, cpu); + if (!IS_ERR_OR_NULL(zstrm)) + zcomp_strm_free(zstrm); + *per_cpu_ptr(comp->stream, cpu) = NULL; return 0; } @@ -181,7 +191,7 @@ static int zcomp_init(struct zcomp *comp) { int ret; - comp->stream = alloc_percpu(struct zcomp_strm); + comp->stream = alloc_percpu(struct zcomp_strm *); if (!comp->stream) return -ENOMEM; diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 9e94095ce000..41c1002a7d7d 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -18,7 +18,7 @@ struct zcomp_strm { /* dynamic per-device compression frontend */ struct zcomp { - struct zcomp_strm __percpu *stream; + struct zcomp_strm * __percpu *stream; const char *name; struct hlist_node node; }; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 097176fbd1b6..f39da19f0f3f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -41,7 +41,7 @@ static DEFINE_IDR(zram_index_idr); static DEFINE_MUTEX(zram_index_mutex); static int zram_major; -static const char *default_compressor = "lz4"; +static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -51,8 +51,6 @@ static unsigned int num_devices = 1; */ static size_t huge_class_size; -static struct zram *zram0; - static void zram_free_page(struct zram *zram, size_t index); static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -210,17 +208,14 @@ static inline void zram_fill_page(void *ptr, unsigned long len, static bool page_same_filled(void *ptr, unsigned long *element) { + unsigned int pos; unsigned long *page; unsigned long val; - unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; page = (unsigned long *)ptr; val = page[0]; - if (val != page[last_pos]) - return false; - - for (pos = 1; pos < last_pos; pos++) { + for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { if (val != page[pos]) return false; } @@ -412,10 +407,13 @@ static void reset_bdev(struct zram *zram) return; bdev = zram->bdev; + if (zram->old_block_size) + set_blocksize(bdev, zram->old_block_size); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; + zram->old_block_size = 0; zram->bdev = NULL; zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; @@ -461,7 +459,7 @@ static ssize_t backing_dev_store(struct device *dev, struct file *backing_dev = NULL; struct inode *inode; struct address_space *mapping; - unsigned int bitmap_sz; + unsigned int bitmap_sz, old_block_size = 0; unsigned long nr_pages, *bitmap = NULL; struct block_device *bdev = NULL; int err; @@ -515,8 +513,14 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } + old_block_size = block_size(bdev); + err = set_blocksize(bdev, PAGE_SIZE); + if (err) + goto out; + reset_bdev(zram); + zram->old_block_size = old_block_size; zram->bdev = bdev; zram->backing_dev = backing_dev; zram->bitmap = bitmap; @@ -540,7 +544,8 @@ static ssize_t backing_dev_store(struct device *dev, return len; out: - kvfree(bitmap); + if (bitmap) + kvfree(bitmap); if (bdev) blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); @@ -1061,25 +1066,20 @@ static ssize_t use_dedup_store(struct device *dev, } #endif -void zram_compact(void) +static ssize_t compact_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { - if (!zram0) - return; + struct zram *zram = dev_to_zram(dev); - down_read(&zram0->init_lock); - if (!init_done(zram0)) { - up_read(&zram0->init_lock); - return; + down_read(&zram->init_lock); + if (!init_done(zram)) { + up_read(&zram->init_lock); + return -EINVAL; } - zs_compact(zram0->mem_pool); - up_read(&zram0->init_lock); -} + zs_compact(zram->mem_pool); + up_read(&zram->init_lock); -static ssize_t compact_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - zram_compact(); return len; } @@ -1382,7 +1382,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ - if (WARN_ON(ret)) + if (unlikely(ret)) pr_err("Decompression failed! err=%d, page=%u\n", ret, index); return ret; @@ -2022,11 +2022,6 @@ static int zram_add(void) goto out_free_dev; device_id = ret; - if (device_id >= 1) { - ret = -ENOMEM; - goto out_free_idr; - } - init_rwsem(&zram->init_lock); #ifdef CONFIG_ZRAM_WRITEBACK spin_lock_init(&zram->wb_limit_lock); @@ -2096,7 +2091,6 @@ static int zram_add(void) strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); zram_debugfs_register(zram); - zram0 = zram; pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; @@ -2138,7 +2132,6 @@ static int zram_remove(struct zram *zram) del_gendisk(zram->disk); blk_cleanup_queue(zram->disk->queue); put_disk(zram->disk); - zram0 = NULL; kfree(zram); return 0; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index bdb2ec95931c..22f8366d7971 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -141,6 +141,7 @@ struct zram { bool wb_limit_enable; u64 bd_wb_limit; struct block_device *bdev; + unsigned int old_block_size; unsigned long *bitmap; unsigned long nr_pages; #endif diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index 205218a941e1..1f97ce26cccc 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -42,8 +42,26 @@ \ /* Callbacks for augmented rbtree insert and remove */ \ \ -RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment, \ - ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST) \ +static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node) \ +{ \ + ITTYPE max = ITLAST(node), subtree_last; \ + if (node->ITRB.rb_left) { \ + subtree_last = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + if (node->ITRB.rb_right) { \ + subtree_last = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + return max; \ +} \ + \ +RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ + ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last) \ \ /* Insert / remove interval nodes from the tree */ \ \ diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index ba8d7e7b9480..af8a61be2d8d 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -107,40 +107,6 @@ rbstatic const struct rb_augment_callbacks rbname = { \ .rotate = rbname ## _rotate \ }; -/* - * Template for declaring augmented rbtree callbacks, - * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes. - * - * RBSTATIC: 'static' or empty - * RBNAME: name of the rb_augment_callbacks structure - * RBSTRUCT: struct type of the tree nodes - * RBFIELD: name of struct rb_node field within RBSTRUCT - * RBTYPE: type of the RBAUGMENTED field - * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree - * RBCOMPUTE: name of function that returns the per-node RBTYPE scalar - */ - -#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD, \ - RBTYPE, RBAUGMENTED, RBCOMPUTE) \ -static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node) \ -{ \ - RBSTRUCT *child; \ - RBTYPE max = RBCOMPUTE(node); \ - if (node->RBFIELD.rb_left) { \ - child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD); \ - if (child->RBAUGMENTED > max) \ - max = child->RBAUGMENTED; \ - } \ - if (node->RBFIELD.rb_right) { \ - child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD); \ - if (child->RBAUGMENTED > max) \ - max = child->RBAUGMENTED; \ - } \ - return max; \ -} \ -RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD, \ - RBTYPE, RBAUGMENTED, RBNAME ## _compute_max) - #define RB_RED 0 #define RB_BLACK 1 diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 6e7a53d06038..b7055b2a07d3 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -76,10 +76,26 @@ static inline void erase_cached(struct test_node *node, struct rb_root_cached *r } -#define NODE_VAL(node) ((node)->val) +static inline u32 augment_recompute(struct test_node *node) +{ + u32 max = node->val, child_augmented; + if (node->rb.rb_left) { + child_augmented = rb_entry(node->rb.rb_left, struct test_node, + rb)->augmented; + if (max < child_augmented) + max = child_augmented; + } + if (node->rb.rb_right) { + child_augmented = rb_entry(node->rb.rb_right, struct test_node, + rb)->augmented; + if (max < child_augmented) + max = child_augmented; + } + return max; +} -RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks, - struct test_node, rb, u32, augmented, NODE_VAL) +RB_DECLARE_CALLBACKS(static, augment_callbacks, struct test_node, rb, + u32, augmented, augment_recompute) static void insert_augmented(struct test_node *node, struct rb_root_cached *root) @@ -221,20 +237,7 @@ static void check_augmented(int nr_nodes) check(nr_nodes); for (rb = rb_first(&root.rb_root); rb; rb = rb_next(rb)) { struct test_node *node = rb_entry(rb, struct test_node, rb); - u32 subtree, max = node->val; - if (node->rb.rb_left) { - subtree = rb_entry(node->rb.rb_left, struct test_node, - rb)->augmented; - if (max < subtree) - max = subtree; - } - if (node->rb.rb_right) { - subtree = rb_entry(node->rb.rb_right, struct test_node, - rb)->augmented; - if (max < subtree) - max = subtree; - } - WARN_ON_ONCE(node->augmented != max); + WARN_ON_ONCE(node->augmented != augment_recompute(node)); } } diff --git a/mm/mmap.c b/mm/mmap.c index 24e1c409300e..0df01b0f7783 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -303,9 +303,9 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) return retval; } -static inline unsigned long vma_compute_gap(struct vm_area_struct *vma) +static long vma_compute_subtree_gap(struct vm_area_struct *vma) { - unsigned long gap, prev_end; + unsigned long max, prev_end, subtree_gap; /* * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we @@ -313,21 +313,14 @@ static inline unsigned long vma_compute_gap(struct vm_area_struct *vma) * an unmapped area; whereas when expanding we only require one. * That's a little inconsistent, but keeps the code here simpler. */ - gap = vm_start_gap(vma); + max = vm_start_gap(vma); if (vma->vm_prev) { prev_end = vm_end_gap(vma->vm_prev); - if (gap > prev_end) - gap -= prev_end; + if (max > prev_end) + max -= prev_end; else - gap = 0; + max = 0; } - return gap; -} - -#ifdef CONFIG_DEBUG_VM_RB -static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma) -{ - unsigned long max = vma_compute_gap(vma), subtree_gap; if (vma->vm_rb.rb_left) { subtree_gap = rb_entry(vma->vm_rb.rb_left, struct vm_area_struct, vm_rb)->rb_subtree_gap; @@ -343,6 +336,7 @@ static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma) return max; } +#ifdef CONFIG_DEBUG_VM_RB static int browse_rb(struct mm_struct *mm) { struct rb_root *root = &mm->mm_rb; @@ -456,9 +450,8 @@ static void validate_mm(struct mm_struct *mm) #define mm_rb_write_unlock(mm) do { } while (0) #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ -RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks, - struct vm_area_struct, vm_rb, - unsigned long, rb_subtree_gap, vma_compute_gap) +RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb, + unsigned long, rb_subtree_gap, vma_compute_subtree_gap) /* * Update augmented rbtree rb_subtree_gap values after vma->vm_start or @@ -468,8 +461,8 @@ RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks, static void vma_gap_update(struct vm_area_struct *vma) { /* - * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created - * a callback function that does exactly what we want. + * As it turns out, RB_DECLARE_CALLBACKS() already created a callback + * function that does exactly what we want. */ vma_gap_callbacks_propagate(&vma->vm_rb, NULL); } diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h index 8296bb48c7b3..43be941db695 100644 --- a/tools/include/linux/rbtree_augmented.h +++ b/tools/include/linux/rbtree_augmented.h @@ -96,40 +96,6 @@ rbstatic const struct rb_augment_callbacks rbname = { \ rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ }; -/* - * Template for declaring augmented rbtree callbacks, - * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes. - * - * RBSTATIC: 'static' or empty - * RBNAME: name of the rb_augment_callbacks structure - * RBSTRUCT: struct type of the tree nodes - * RBFIELD: name of struct rb_node field within RBSTRUCT - * RBTYPE: type of the RBAUGMENTED field - * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree - * RBCOMPUTE: name of function that returns the per-node RBTYPE scalar - */ - -#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD, \ - RBTYPE, RBAUGMENTED, RBCOMPUTE) \ -static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node) \ -{ \ - RBSTRUCT *child; \ - RBTYPE max = RBCOMPUTE(node); \ - if (node->RBFIELD.rb_left) { \ - child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD); \ - if (child->RBAUGMENTED > max) \ - max = child->RBAUGMENTED; \ - } \ - if (node->RBFIELD.rb_right) { \ - child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD); \ - if (child->RBAUGMENTED > max) \ - max = child->RBAUGMENTED; \ - } \ - return max; \ -} \ -RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD, \ - RBTYPE, RBAUGMENTED, RBNAME ## _compute_max) - #define RB_RED 0 #define RB_BLACK 1 From 7c9b53da9350acdbc399470e9035dbd5c5d81a35 Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Sun, 19 Sep 2021 21:21:23 +0900 Subject: [PATCH 18/44] zram: kang from v5.15 Signed-off-by: Juhyung Park Change-Id: I2f2356060285aa9e934f23c17ea6057fba094343 Signed-off-by: UtsavBalar1231 --- Documentation/blockdev/zram.txt | 262 +++++++++++++-------- drivers/block/zram/Kconfig | 54 +++-- drivers/block/zram/Makefile | 4 +- drivers/block/zram/zcomp.c | 60 +++-- drivers/block/zram/zcomp.h | 11 +- drivers/block/zram/zram_dedup.c | 255 -------------------- drivers/block/zram/zram_dedup.h | 45 ---- drivers/block/zram/zram_drv.c | 403 +++++++++++--------------------- drivers/block/zram/zram_drv.h | 42 +--- 9 files changed, 373 insertions(+), 763 deletions(-) delete mode 100644 drivers/block/zram/zram_dedup.c delete mode 100644 drivers/block/zram/zram_dedup.h diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index c6e559e579b6..700329d25f57 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -1,20 +1,24 @@ -zram: Compressed RAM based block devices ----------------------------------------- +======================================== +zram: Compressed RAM-based block devices +======================================== -* Introduction +Introduction +============ -The zram module creates RAM based block devices named /dev/zram +The zram module creates RAM-based block devices named /dev/zram ( = 0, 1, ...). Pages written to these disks are compressed and stored in memory itself. These disks allow very fast I/O and compression provides -good amounts of memory savings. Some of the usecases include /tmp storage, -use as swap disks, various caches under /var and maybe many more :) +good amounts of memory savings. Some of the use cases include /tmp storage, +use as swap disks, various caches under /var and maybe many more. :) Statistics for individual zram devices are exported through sysfs nodes at /sys/block/zram/ -* Usage +Usage +===== There are several ways to configure and manage zram device(-s): + a) using zram and zram_control sysfs attributes b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org). @@ -22,7 +26,7 @@ In this document we will describe only 'manual' zram configuration steps, IOW, zram and zram_control sysfs attributes. In order to get a better idea about zramctl please consult util-linux -documentation, zramctl man-page or `zramctl --help'. Please be informed +documentation, zramctl man-page or `zramctl --help`. Please be informed that zram maintainers do not develop/maintain util-linux or zramctl, should you have any questions please contact util-linux@vger.kernel.org @@ -30,52 +34,67 @@ Following shows a typical sequence of steps for using zram. WARNING ======= + For the sake of simplicity we skip error checking parts in most of the examples below. However, it is your sole responsibility to handle errors. zram sysfs attributes always return negative values in case of errors. The list of possible return codes: --EBUSY -- an attempt to modify an attribute that cannot be changed once -the device has been initialised. Please reset device first; --ENOMEM -- zram was not able to allocate enough memory to fulfil your -needs; --EINVAL -- invalid input has been provided. -If you use 'echo', the returned value that is changed by 'echo' utility, -and, in general case, something like: +======== ============================================================= +-EBUSY an attempt to modify an attribute that cannot be changed once + the device has been initialised. Please reset device first. +-ENOMEM zram was not able to allocate enough memory to fulfil your + needs. +-EINVAL invalid input has been provided. +======== ============================================================= + +If you use 'echo', the returned value is set by the 'echo' utility, +and, in general case, something like:: echo 3 > /sys/block/zram0/max_comp_streams - if [ $? -ne 0 ]; + if [ $? -ne 0 ]; then handle_error fi should suffice. -1) Load Module: +1) Load Module +============== + +:: + modprobe zram num_devices=4 - This creates 4 devices: /dev/zram{0,1,2,3} + +This creates 4 devices: /dev/zram{0,1,2,3} num_devices parameter is optional and tells zram how many devices should be pre-created. Default: 1. 2) Set max number of compression streams -Regardless the value passed to this attribute, ZRAM will always -allocate multiple compression streams - one per online CPUs - thus +======================================== + +Regardless of the value passed to this attribute, ZRAM will always +allocate multiple compression streams - one per online CPU - thus allowing several concurrent compression operations. The number of allocated compression streams goes down when some of the CPUs become offline. There is no single-compression-stream mode anymore, -unless you are running a UP system or has only 1 CPU online. +unless you are running a UP system or have only 1 CPU online. + +To find out how many streams are currently available:: -To find out how many streams are currently available: cat /sys/block/zram0/max_comp_streams 3) Select compression algorithm +=============================== + Using comp_algorithm device attribute one can see available and currently selected (shown in square brackets) compression algorithms, -change selected compression algorithm (once the device is initialised +or change the selected compression algorithm (once the device is initialised there is no way to change compression algorithm). -Examples: +Examples:: + #show supported compression algorithms cat /sys/block/zram0/comp_algorithm lzo [lz4] @@ -83,20 +102,23 @@ Examples: #select lzo compression algorithm echo lzo > /sys/block/zram0/comp_algorithm -For the time being, the `comp_algorithm' content does not necessarily +For the time being, the `comp_algorithm` content does not necessarily show every compression algorithm supported by the kernel. We keep this list primarily to simplify device configuration and one can configure a new device with a compression algorithm that is not listed in -`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API +`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API and, if some of the algorithms were built as modules, it's impossible to list all of them using, for instance, /proc/crypto or any other method. This, however, has an advantage of permitting the usage of custom crypto compression modules (implementing S/W or H/W compression). 4) Set Disksize +=============== + Set disk size by writing the value to sysfs node 'disksize'. The value can be either in bytes or you can use mem suffixes. -Examples: +Examples:: + # Initialize /dev/zram0 with 50MB disksize echo $((50*1024*1024)) > /sys/block/zram0/disksize @@ -111,10 +133,13 @@ since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. 5) Set memory limit: Optional +============================= + Set memory limit by writing the value to sysfs node 'mem_limit'. The value can be either in bytes or you can use mem suffixes. In addition, you could change the value in runtime. -Examples: +Examples:: + # limit /dev/zram0 with 50MB memory echo $((50*1024*1024)) > /sys/block/zram0/mem_limit @@ -126,7 +151,11 @@ Examples: # To disable memory limit echo 0 > /sys/block/zram0/mem_limit -6) Activate: +6) Activate +=========== + +:: + mkswap /dev/zram0 swapon /dev/zram0 @@ -134,52 +163,60 @@ Examples: mount /dev/zram1 /tmp 7) Add/remove zram devices +========================== zram provides a control interface, which enables dynamic (on-demand) device addition and removal. -In order to add a new /dev/zramX device, perform read operation on hot_add -attribute. This will return either new device's device id (meaning that you -can use /dev/zram) or error code. +In order to add a new /dev/zramX device, perform a read operation on the hot_add +attribute. This will return either the new device's device id (meaning that you +can use /dev/zram) or an error code. + +Example:: -Example: cat /sys/class/zram-control/hot_add 1 To remove the existing /dev/zramX device (where X is a device id) -execute +execute:: + echo X > /sys/class/zram-control/hot_remove -8) Stats: +8) Stats +======== + Per-device statistics are exported as various nodes under /sys/block/zram/ -A brief description of exported device attributes. For more details please -read Documentation/ABI/testing/sysfs-block-zram. +A brief description of exported device attributes follows. For more details +please read Documentation/ABI/testing/sysfs-block-zram. +====================== ====== =============================================== Name access description ----- ------ ----------- +====================== ====== =============================================== disksize RW show and set the device's disk size initstate RO shows the initialization state of the device reset WO trigger device reset -mem_used_max WO reset the `mem_used_max' counter (see later) -mem_limit WO specifies the maximum amount of memory ZRAM can use - to store the compressed data -writeback_limit WO specifies the maximum amount of write IO zram can - write out to backing device as 4KB unit +mem_used_max WO reset the `mem_used_max` counter (see later) +mem_limit WO specifies the maximum amount of memory ZRAM can + use to store the compressed data +writeback_limit WO specifies the maximum amount of write IO zram + can write out to backing device as 4KB unit writeback_limit_enable RW show and set writeback_limit feature -max_comp_streams RW the number of possible concurrent compress operations +max_comp_streams RW the number of possible concurrent compress + operations comp_algorithm RW show and change the compression algorithm compact WO trigger memory compaction debug_stat RO this file is used for zram debugging purposes backing_dev RW set up backend storage for zram to write out idle WO mark allocated slot as idle -use_dedup RW show and set deduplication feature +====================== ====== =============================================== + User space is advised to use the following files to read the device statistics. File /sys/block/zram/stat -Represents block layer statistics. Read Documentation/block/stat.txt for +Represents block layer statistics. Read Documentation/block/stat.rst for details. File /sys/block/zram/io_stat @@ -188,26 +225,32 @@ The stat file represents device's I/O statistics not accounted by block layer and, thus, not available in zram/stat file. It consists of a single line of text and contains the following stats separated by whitespace: - failed_reads the number of failed reads - failed_writes the number of failed writes - invalid_io the number of non-page-size-aligned I/O requests + + ============= ============================================================= + failed_reads The number of failed reads + failed_writes The number of failed writes + invalid_io The number of non-page-size-aligned I/O requests notify_free Depending on device usage scenario it may account + a) the number of pages freed because of swap slot free - notifications or b) the number of pages freed because of - REQ_DISCARD requests sent by bio. The former ones are - sent to a swap block device when a swap slot is freed, - which implies that this disk is being used as a swap disk. + notifications + b) the number of pages freed because of + REQ_OP_DISCARD requests sent by bio. The former ones are + sent to a swap block device when a swap slot is freed, + which implies that this disk is being used as a swap disk. + The latter ones are sent by filesystem mounted with discard option, whenever some data blocks are getting discarded. + ============= ============================================================= File /sys/block/zram/mm_stat -The stat file represents device's mm statistics. It consists of a single +The mm_stat file represents the device's mm statistics. It consists of a single line of text and contains the following stats separated by whitespace: + + ================ ============================================================= orig_data_size uncompressed size of data stored in this disk. - This excludes same-element-filled pages (same_pages) since - no memory is allocated for them. Unit: bytes compr_data_size compressed size of data stored in this disk mem_used_total the amount of memory allocated for this disk. This @@ -217,88 +260,105 @@ line of text and contains the following stats separated by whitespace: Unit: bytes mem_limit the maximum amount of memory ZRAM can use to store the compressed data - mem_used_max the maximum amount of memory zram have consumed to + mem_used_max the maximum amount of memory zram has consumed to store the data same_pages the number of same element filled pages written to this disk. No memory is allocated for such pages. pages_compacted the number of pages freed during compaction huge_pages the number of incompressible pages - dup_data_size deduplicated data size - meta_data_size the amount of metadata allocated for deduplication feature + huge_pages_since the number of incompressible pages since zram set up + ================ ============================================================= File /sys/block/zram/bd_stat -The stat file represents device's backing device statistics. It consists of +The bd_stat file represents a device's backing device statistics. It consists of a single line of text and contains the following stats separated by whitespace: + + ============== ============================================================= bd_count size of data written in backing device. Unit: 4K bytes bd_reads the number of reads from backing device Unit: 4K bytes bd_writes the number of writes to backing device Unit: 4K bytes + ============== ============================================================= + +9) Deactivate +============= + +:: -9) Deactivate: swapoff /dev/zram0 umount /dev/zram1 -10) Reset: - Write any positive value to 'reset' sysfs node - echo 1 > /sys/block/zram0/reset - echo 1 > /sys/block/zram1/reset +10) Reset +========= + + Write any positive value to 'reset' sysfs node:: + + echo 1 > /sys/block/zram0/reset + echo 1 > /sys/block/zram1/reset This frees all the memory allocated for the given device and resets the disksize to zero. You must set the disksize again before reusing the device. -* Optional Feature +Optional Feature +================ -= writeback +writeback +--------- With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page to backing storage rather than keeping it in memory. -To use the feature, admin should set up backing device via +To use the feature, admin should set up backing device via:: - "echo /dev/sda5 > /sys/block/zramX/backing_dev" + echo /dev/sda5 > /sys/block/zramX/backing_dev before disksize setting. It supports only partition at this moment. -If admin want to use incompressible page writeback, they could do via +If admin wants to use incompressible page writeback, they could do via:: - "echo huge > /sys/block/zramX/write" + echo huge > /sys/block/zramX/writeback To use idle page writeback, first, user need to declare zram pages -as idle. +as idle:: - "echo all > /sys/block/zramX/idle" + echo all > /sys/block/zramX/idle From now on, any pages on zram are idle pages. The idle mark -will be removed until someone request access of the block. +will be removed until someone requests access of the block. IOW, unless there is access request, those pages are still idle pages. -Admin can request writeback of those idle pages at right timing via +Admin can request writeback of those idle pages at right timing via:: - "echo idle > /sys/block/zramX/writeback" + echo idle > /sys/block/zramX/writeback With the command, zram writeback idle pages from memory to the storage. +If admin want to write a specific page in zram device to backing device, +they could write a page index into the interface. + + echo "page_index=1251" > /sys/block/zramX/writeback + If there are lots of write IO with flash device, potentially, it has flash wearout problem so that admin needs to design write limitation to guarantee storage health for entire product life. To overcome the concern, zram supports "writeback_limit" feature. The "writeback_limit_enable"'s default value is 0 so that it doesn't limit -any writeback. IOW, if admin want to apply writeback budget, he should -enable writeback_limit_enable via +any writeback. IOW, if admin wants to apply writeback budget, he should +enable writeback_limit_enable via:: $ echo 1 > /sys/block/zramX/writeback_limit_enable Once writeback_limit_enable is set, zram doesn't allow any writeback -until admin set the budget via /sys/block/zramX/writeback_limit. +until admin sets the budget via /sys/block/zramX/writeback_limit. (If admin doesn't enable writeback_limit_enable, writeback_limit's value -assigned via /sys/block/zramX/writeback_limit is meaninless.) +assigned via /sys/block/zramX/writeback_limit is meaningless.) If admin want to limit writeback as per-day 400M, he could do it -like below. +like below:: $ MB_SHIFT=20 $ 4K_SHIFT=12 @@ -306,47 +366,57 @@ like below. /sys/block/zram0/writeback_limit. $ echo 1 > /sys/block/zram0/writeback_limit_enable -If admin want to allow further write again once the bugdet is exausted, -he could do it like below +If admins want to allow further write again once the budget is exhausted, +he could do it like below:: $ echo $((400<>4K_SHIFT)) > \ /sys/block/zram0/writeback_limit -If admin want to see remaining writeback budget since he set, +If admin wants to see remaining writeback budget since last set:: $ cat /sys/block/zramX/writeback_limit -If admin want to disable writeback limit, he could do +If admin want to disable writeback limit, he could do:: $ echo 0 > /sys/block/zramX/writeback_limit_enable -The writeback_limit count will reset whenever you reset zram(e.g., +The writeback_limit count will reset whenever you reset zram (e.g., system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of writeback happened until you reset the zram to allocate extra writeback budget in next setting is user's job. -If admin want to measure writeback count in a certain period, he could +If admin wants to measure writeback count in a certain period, he could know it via /sys/block/zram0/bd_stat's 3rd column. -= memory tracking +memory tracking +=============== With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the zram block. It could be useful to catch cold or incompressible pages of the process with*pagemap. + If you enable the feature, you could see block state via -/sys/kernel/debug/zram/zram0/block_state". The output is as follows, +/sys/kernel/debug/zram/zram0/block_state". The output is as follows:: 300 75.033841 .wh. 301 63.806904 s... 302 63.806919 ..hi -First column is zram's block index. -Second column is access time since the system was booted -Third column is state of the block. -(s: same page -w: written page to backing store -h: huge page -i: idle page) +First column + zram's block index. +Second column + access time since the system was booted +Third column + state of the block: + + s: + same page + w: + written page to backing store + h: + huge page + i: + idle page First line of above example says 300th block is accessed at 75.033841sec and the block's state is huge so it is written back to the backing diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index c8aab3115733..4a5c255ca6e4 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -2,8 +2,7 @@ config ZRAM tristate "Compressed RAM block device support" depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO - select CRYPTO_LZO - default n + depends on CRYPTO_LZO || CRYPTO_ZSTD || CRYPTO_LZ4 || CRYPTO_LZ4HC || CRYPTO_842 help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). Pages written to these disks are compressed and stored in memory @@ -15,24 +14,49 @@ config ZRAM See Documentation/blockdev/zram.txt for more information. -config ZRAM_DEDUP - bool "Deduplication support for ZRAM data" +choice + prompt "Default zram compressor" + default ZRAM_DEF_COMP_LZORLE depends on ZRAM - default n - help - Deduplicate ZRAM data to reduce amount of memory consumption. - Advantage largely depends on the workload. In some cases, this - option reduces memory usage to the half. However, if there is no - duplicated data, the amount of memory consumption would be - increased due to additional metadata usage. And, there is - computation time trade-off. Please check the benefit before - enabling this option. Experiment shows the positive effect when - the zram is used as blockdev and is used to store build output. + +config ZRAM_DEF_COMP_LZORLE + bool "lzo-rle" + depends on CRYPTO_LZO + +config ZRAM_DEF_COMP_ZSTD + bool "zstd" + depends on CRYPTO_ZSTD + +config ZRAM_DEF_COMP_LZ4 + bool "lz4" + depends on CRYPTO_LZ4 + +config ZRAM_DEF_COMP_LZO + bool "lzo" + depends on CRYPTO_LZO + +config ZRAM_DEF_COMP_LZ4HC + bool "lz4hc" + depends on CRYPTO_LZ4HC + +config ZRAM_DEF_COMP_842 + bool "842" + depends on CRYPTO_842 + +endchoice + +config ZRAM_DEF_COMP + string + default "lzo-rle" if ZRAM_DEF_COMP_LZORLE + default "zstd" if ZRAM_DEF_COMP_ZSTD + default "lz4" if ZRAM_DEF_COMP_LZ4 + default "lzo" if ZRAM_DEF_COMP_LZO + default "lz4hc" if ZRAM_DEF_COMP_LZ4HC + default "842" if ZRAM_DEF_COMP_842 config ZRAM_WRITEBACK bool "Write back incompressible or idle page to backing device" depends on ZRAM - default n help With incompressible page, there is no memory saving to keep it in memory. Instead, write it out to backing device. diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index d7204ef6ee53..de9e457907b1 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,4 +1,4 @@ -zram-y := zcomp.o zram_drv.o -zram-$(CONFIG_ZRAM_DEDUP) += zram_dedup.o +# SPDX-License-Identifier: GPL-2.0-only +zram-y := zcomp.o zram_drv.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 4ed0a78fdc09..052aa3f65514 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2014 Sergey Senozhatsky. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include @@ -19,7 +15,10 @@ #include "zcomp.h" static const char * const backends[] = { +#if IS_ENABLED(CONFIG_CRYPTO_LZO) "lzo", + "lzo-rle", +#endif #if IS_ENABLED(CONFIG_CRYPTO_LZ4) "lz4", #endif @@ -32,7 +31,6 @@ static const char * const backends[] = { #if IS_ENABLED(CONFIG_CRYPTO_ZSTD) "zstd", #endif - NULL }; static void zcomp_strm_free(struct zcomp_strm *zstrm) @@ -40,19 +38,16 @@ static void zcomp_strm_free(struct zcomp_strm *zstrm) if (!IS_ERR_OR_NULL(zstrm->tfm)) crypto_free_comp(zstrm->tfm); free_pages((unsigned long)zstrm->buffer, 1); - kfree(zstrm); + zstrm->tfm = NULL; + zstrm->buffer = NULL; } /* - * allocate new zcomp_strm structure with ->tfm initialized by - * backend, return NULL on error + * Initialize zcomp_strm structure with ->tfm initialized by backend, and + * ->buffer. Return a negative value on error. */ -static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) +static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp) { - struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); - if (!zstrm) - return NULL; - zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0); /* * allocate 2 pages. 1 for compressed data, plus 1 extra for the @@ -61,16 +56,16 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) { zcomp_strm_free(zstrm); - zstrm = NULL; + return -ENOMEM; } - return zstrm; + return 0; } bool zcomp_available_algorithm(const char *comp) { int i; - i = __sysfs_match_string(backends, -1, comp); + i = sysfs_match_string(backends, comp); if (i >= 0) return true; @@ -89,9 +84,9 @@ ssize_t zcomp_available_show(const char *comp, char *buf) { bool known_algorithm = false; ssize_t sz = 0; - int i = 0; + int i; - for (; backends[i]; i++) { + for (i = 0; i < ARRAY_SIZE(backends); i++) { if (!strcmp(comp, backends[i])) { known_algorithm = true; sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, @@ -116,12 +111,13 @@ ssize_t zcomp_available_show(const char *comp, char *buf) struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { - return *get_cpu_ptr(comp->stream); + local_lock(&comp->stream->lock); + return this_cpu_ptr(comp->stream); } void zcomp_stream_put(struct zcomp *comp) { - put_cpu_ptr(comp->stream); + local_unlock(&comp->stream->lock); } int zcomp_compress(struct zcomp_strm *zstrm, @@ -162,17 +158,15 @@ int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) { struct zcomp *comp = hlist_entry(node, struct zcomp, node); struct zcomp_strm *zstrm; + int ret; - if (WARN_ON(*per_cpu_ptr(comp->stream, cpu))) - return 0; + zstrm = per_cpu_ptr(comp->stream, cpu); + local_lock_init(&zstrm->lock); - zstrm = zcomp_strm_alloc(comp); - if (IS_ERR_OR_NULL(zstrm)) { + ret = zcomp_strm_init(zstrm, comp); + if (ret) pr_err("Can't allocate a compression stream\n"); - return -ENOMEM; - } - *per_cpu_ptr(comp->stream, cpu) = zstrm; - return 0; + return ret; } int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node) @@ -180,10 +174,8 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node) struct zcomp *comp = hlist_entry(node, struct zcomp, node); struct zcomp_strm *zstrm; - zstrm = *per_cpu_ptr(comp->stream, cpu); - if (!IS_ERR_OR_NULL(zstrm)) - zcomp_strm_free(zstrm); - *per_cpu_ptr(comp->stream, cpu) = NULL; + zstrm = per_cpu_ptr(comp->stream, cpu); + zcomp_strm_free(zstrm); return 0; } @@ -191,7 +183,7 @@ static int zcomp_init(struct zcomp *comp) { int ret; - comp->stream = alloc_percpu(struct zcomp_strm *); + comp->stream = alloc_percpu(struct zcomp_strm); if (!comp->stream) return -ENOMEM; diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 41c1002a7d7d..40f6420f4b2e 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -1,16 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2014 Sergey Senozhatsky. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef _ZCOMP_H_ #define _ZCOMP_H_ +#include struct zcomp_strm { + /* The members ->buffer and ->tfm are protected by ->lock. */ + local_lock_t lock; /* compression/decompression buffer */ void *buffer; struct crypto_comp *tfm; @@ -18,7 +17,7 @@ struct zcomp_strm { /* dynamic per-device compression frontend */ struct zcomp { - struct zcomp_strm * __percpu *stream; + struct zcomp_strm __percpu *stream; const char *name; struct hlist_node node; }; diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c deleted file mode 100644 index e441289fff81..000000000000 --- a/drivers/block/zram/zram_dedup.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (C) 2017 Joonsoo Kim. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include - -#include "zram_drv.h" - -/* One slot will contain 128 pages theoretically */ -#define ZRAM_HASH_SHIFT 7 -#define ZRAM_HASH_SIZE_MIN (1 << 10) -#define ZRAM_HASH_SIZE_MAX (1 << 31) - -u64 zram_dedup_dup_size(struct zram *zram) -{ - return (u64)atomic64_read(&zram->stats.dup_data_size); -} - -u64 zram_dedup_meta_size(struct zram *zram) -{ - return (u64)atomic64_read(&zram->stats.meta_data_size); -} - -static u32 zram_dedup_checksum(unsigned char *mem) -{ - return jhash(mem, PAGE_SIZE, 0); -} - -void zram_dedup_insert(struct zram *zram, struct zram_entry *new, - u32 checksum) -{ - struct zram_hash *hash; - struct rb_root *rb_root; - struct rb_node **rb_node, *parent = NULL; - struct zram_entry *entry; - - if (!zram_dedup_enabled(zram)) - return; - - new->checksum = checksum; - hash = &zram->hash[checksum % zram->hash_size]; - rb_root = &hash->rb_root; - - spin_lock(&hash->lock); - rb_node = &rb_root->rb_node; - while (*rb_node) { - parent = *rb_node; - entry = rb_entry(parent, struct zram_entry, rb_node); - if (checksum < entry->checksum) - rb_node = &parent->rb_left; - else if (checksum > entry->checksum) - rb_node = &parent->rb_right; - else - rb_node = &parent->rb_left; - } - - rb_link_node(&new->rb_node, parent, rb_node); - rb_insert_color(&new->rb_node, rb_root); - spin_unlock(&hash->lock); -} - -static bool zram_dedup_match(struct zram *zram, struct zram_entry *entry, - unsigned char *mem) -{ - bool match = false; - unsigned char *cmem; - struct zcomp_strm *zstrm; - - cmem = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_RO); - if (entry->len == PAGE_SIZE) { - match = !memcmp(mem, cmem, PAGE_SIZE); - } else { - zstrm = zcomp_stream_get(zram->comp); - if (!zcomp_decompress(zstrm, cmem, entry->len, zstrm->buffer)) - match = !memcmp(mem, zstrm->buffer, PAGE_SIZE); - zcomp_stream_put(zram->comp); - } - zs_unmap_object(zram->mem_pool, entry->handle); - - return match; -} - -static unsigned long zram_dedup_put(struct zram *zram, - struct zram_entry *entry) -{ - struct zram_hash *hash; - u32 checksum; - unsigned long val; - - checksum = entry->checksum; - hash = &zram->hash[checksum % zram->hash_size]; - - spin_lock(&hash->lock); - - val = --entry->refcount; - if (!entry->refcount) - rb_erase(&entry->rb_node, &hash->rb_root); - else - atomic64_sub(entry->len, &zram->stats.dup_data_size); - - spin_unlock(&hash->lock); - - return val; -} - -static struct zram_entry *__zram_dedup_get(struct zram *zram, - struct zram_hash *hash, unsigned char *mem, - struct zram_entry *entry) -{ - struct zram_entry *tmp, *prev = NULL; - struct rb_node *rb_node; - - /* find left-most entry with same checksum */ - while ((rb_node = rb_prev(&entry->rb_node))) { - tmp = rb_entry(rb_node, struct zram_entry, rb_node); - if (tmp->checksum != entry->checksum) - break; - - entry = tmp; - } - -again: - entry->refcount++; - atomic64_add(entry->len, &zram->stats.dup_data_size); - spin_unlock(&hash->lock); - - if (prev) - zram_entry_free(zram, prev); - - if (zram_dedup_match(zram, entry, mem)) - return entry; - - spin_lock(&hash->lock); - tmp = NULL; - rb_node = rb_next(&entry->rb_node); - if (rb_node) - tmp = rb_entry(rb_node, struct zram_entry, rb_node); - - if (tmp && (tmp->checksum == entry->checksum)) { - prev = entry; - entry = tmp; - goto again; - } - - spin_unlock(&hash->lock); - zram_entry_free(zram, entry); - - return NULL; -} - -static struct zram_entry *zram_dedup_get(struct zram *zram, - unsigned char *mem, u32 checksum) -{ - struct zram_hash *hash; - struct zram_entry *entry; - struct rb_node *rb_node; - - hash = &zram->hash[checksum % zram->hash_size]; - - spin_lock(&hash->lock); - rb_node = hash->rb_root.rb_node; - while (rb_node) { - entry = rb_entry(rb_node, struct zram_entry, rb_node); - if (checksum == entry->checksum) - return __zram_dedup_get(zram, hash, mem, entry); - - if (checksum < entry->checksum) - rb_node = rb_node->rb_left; - else - rb_node = rb_node->rb_right; - } - spin_unlock(&hash->lock); - - return NULL; -} - -struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, - u32 *checksum) -{ - void *mem; - struct zram_entry *entry; - - if (!zram_dedup_enabled(zram)) - return NULL; - - mem = kmap_atomic(page); - *checksum = zram_dedup_checksum(mem); - - entry = zram_dedup_get(zram, mem, *checksum); - kunmap_atomic(mem); - - return entry; -} - -void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, - unsigned long handle, unsigned int len) -{ - if (!zram_dedup_enabled(zram)) - return; - - entry->handle = handle; - entry->refcount = 1; - entry->len = len; -} - -bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry) -{ - if (!zram_dedup_enabled(zram)) - return true; - - if (zram_dedup_put(zram, entry)) - return false; - - return true; -} - -int zram_dedup_init(struct zram *zram, size_t num_pages) -{ - int i; - struct zram_hash *hash; - - if (!zram_dedup_enabled(zram)) - return 0; - - zram->hash_size = num_pages >> ZRAM_HASH_SHIFT; - zram->hash_size = min_t(size_t, ZRAM_HASH_SIZE_MAX, zram->hash_size); - zram->hash_size = max_t(size_t, ZRAM_HASH_SIZE_MIN, zram->hash_size); - zram->hash = vzalloc(zram->hash_size * sizeof(struct zram_hash)); - if (!zram->hash) { - pr_err("Error allocating zram entry hash\n"); - return -ENOMEM; - } - - for (i = 0; i < zram->hash_size; i++) { - hash = &zram->hash[i]; - spin_lock_init(&hash->lock); - hash->rb_root = RB_ROOT; - } - - return 0; -} - -void zram_dedup_fini(struct zram *zram) -{ - vfree(zram->hash); - zram->hash = NULL; - zram->hash_size = 0; -} diff --git a/drivers/block/zram/zram_dedup.h b/drivers/block/zram/zram_dedup.h deleted file mode 100644 index 8ab267b0b956..000000000000 --- a/drivers/block/zram/zram_dedup.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _ZRAM_DEDUP_H_ -#define _ZRAM_DEDUP_H_ - -struct zram; -struct zram_entry; - -#ifdef CONFIG_ZRAM_DEDUP - -u64 zram_dedup_dup_size(struct zram *zram); -u64 zram_dedup_meta_size(struct zram *zram); - -void zram_dedup_insert(struct zram *zram, struct zram_entry *new, - u32 checksum); -struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, - u32 *checksum); - -void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, - unsigned long handle, unsigned int len); -bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry); - -int zram_dedup_init(struct zram *zram, size_t num_pages); -void zram_dedup_fini(struct zram *zram); -#else - -static inline u64 zram_dedup_dup_size(struct zram *zram) { return 0; } -static inline u64 zram_dedup_meta_size(struct zram *zram) { return 0; } - -static inline void zram_dedup_insert(struct zram *zram, struct zram_entry *new, - u32 checksum) { } -static inline struct zram_entry *zram_dedup_find(struct zram *zram, - struct page *page, u32 *checksum) { return NULL; } - -static inline void zram_dedup_init_entry(struct zram *zram, - struct zram_entry *entry, unsigned long handle, - unsigned int len) { } -static inline bool zram_dedup_put_entry(struct zram *zram, - struct zram_entry *entry) { return true; } - -static inline int zram_dedup_init(struct zram *zram, - size_t num_pages) { return 0; } -static inline void zram_dedup_fini(struct zram *zram) { } - -#endif - -#endif /* _ZRAM_DEDUP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f39da19f0f3f..2f3b1af107ee 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "zram_drv.h" @@ -41,7 +42,7 @@ static DEFINE_IDR(zram_index_idr); static DEFINE_MUTEX(zram_index_mutex); static int zram_major; -static const char *default_compressor = "lzo"; +static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -51,6 +52,9 @@ static unsigned int num_devices = 1; */ static size_t huge_class_size; +static const struct block_device_operations zram_devops; +static const struct block_device_operations zram_wb_devops; + static void zram_free_page(struct zram *zram, size_t index); static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -81,15 +85,14 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } -static struct zram_entry *zram_get_entry(struct zram *zram, u32 index) +static unsigned long zram_get_handle(struct zram *zram, u32 index) { - return zram->table[index].entry; + return zram->table[index].handle; } -static void zram_set_entry(struct zram *zram, u32 index, - struct zram_entry *entry) +static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) { - zram->table[index].entry = entry; + zram->table[index].handle = handle; } /* flag operations require table entry bit_spin_lock() being held */ @@ -208,14 +211,17 @@ static inline void zram_fill_page(void *ptr, unsigned long len, static bool page_same_filled(void *ptr, unsigned long *element) { - unsigned int pos; unsigned long *page; unsigned long val; + unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; page = (unsigned long *)ptr; val = page[0]; - for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { + if (val != page[last_pos]) + return false; + + for (pos = 1; pos < last_pos; pos++) { if (val != page[pos]) return false; } @@ -291,18 +297,8 @@ static ssize_t idle_store(struct device *dev, struct zram *zram = dev_to_zram(dev); unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; int index; - char mode_buf[8]; - ssize_t sz; - - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing new line */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - if (strcmp(mode_buf, "all")) + if (!sysfs_streq(buf, "all")) return -EINVAL; down_read(&zram->init_lock); @@ -407,16 +403,12 @@ static void reset_bdev(struct zram *zram) return; bdev = zram->bdev; - if (zram->old_block_size) - set_blocksize(bdev, zram->old_block_size); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->old_block_size = 0; zram->bdev = NULL; - zram->disk->queue->backing_dev_info->capabilities |= - BDI_CAP_SYNCHRONOUS_IO; + zram->disk->fops = &zram_devops; kvfree(zram->bitmap); zram->bitmap = NULL; } @@ -459,7 +451,7 @@ static ssize_t backing_dev_store(struct device *dev, struct file *backing_dev = NULL; struct inode *inode; struct address_space *mapping; - unsigned int bitmap_sz, old_block_size = 0; + unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; struct block_device *bdev = NULL; int err; @@ -498,9 +490,10 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev = bdgrab(I_BDEV(inode)); - err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); - if (err < 0) { + bdev = blkdev_get_by_dev(inode->i_rdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); + if (IS_ERR(bdev)) { + err = PTR_ERR(bdev); bdev = NULL; goto out; } @@ -513,14 +506,8 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - old_block_size = block_size(bdev); - err = set_blocksize(bdev, PAGE_SIZE); - if (err) - goto out; - reset_bdev(zram); - zram->old_block_size = old_block_size; zram->bdev = bdev; zram->backing_dev = backing_dev; zram->bitmap = bitmap; @@ -535,8 +522,7 @@ static ssize_t backing_dev_store(struct device *dev, * freely but in fact, IO is going on so finally could cause * use-after-free when the IO is really done. */ - zram->disk->queue->backing_dev_info->capabilities &= - ~BDI_CAP_SYNCHRONOUS_IO; + zram->disk->fops = &zram_wb_devops; up_write(&zram->init_lock); pr_info("setup backing device %s\n", file_name); @@ -544,8 +530,7 @@ static ssize_t backing_dev_store(struct device *dev, return len; out: - if (bitmap) - kvfree(bitmap); + kvfree(bitmap); if (bdev) blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); @@ -625,38 +610,41 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, return 1; } +#define PAGE_WB_SIG "page_index=" + +#define PAGE_WRITEBACK 0 #define HUGE_WRITEBACK 1 #define IDLE_WRITEBACK 2 + static ssize_t writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - unsigned long index; + unsigned long index = 0; struct bio bio; struct bio_vec bio_vec; struct page *page; - ssize_t ret, sz; - char mode_buf[8]; - int mode = -1; + ssize_t ret = len; + int mode, err; unsigned long blk_idx = 0; - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing newline */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - - if (!strcmp(mode_buf, "idle")) + if (sysfs_streq(buf, "idle")) mode = IDLE_WRITEBACK; - else if (!strcmp(mode_buf, "huge")) + else if (sysfs_streq(buf, "huge")) mode = HUGE_WRITEBACK; + else { + if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) + return -EINVAL; - if (mode == -1) - return -EINVAL; + if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || + index >= nr_pages) + return -EINVAL; + + nr_pages = 1; + mode = PAGE_WRITEBACK; + } down_read(&zram->init_lock); if (!init_done(zram)) { @@ -675,7 +663,7 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - for (index = 0; index < nr_pages; index++) { + for (; nr_pages != 0; index++, nr_pages--) { struct bio_vec bvec; bvec.bv_page = page; @@ -740,12 +728,17 @@ static ssize_t writeback_store(struct device *dev, * XXX: A single page IO would be inefficient for write * but it would be not bad as starter. */ - ret = submit_bio_wait(&bio); - if (ret) { + err = submit_bio_wait(&bio); + if (err) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); + /* + * Return last IO error unless every IO were + * not suceeded. + */ + ret = err; continue; } @@ -783,7 +776,6 @@ static ssize_t writeback_store(struct device *dev, if (blk_idx) free_block_bdev(zram, blk_idx); - ret = len; __free_page(page); release_init_lock: up_read(&zram->init_lock); @@ -811,9 +803,9 @@ static void zram_sync_read(struct work_struct *work) } /* - * Block layer want one ->make_request_fn to be active at a time - * so if we use chained IO with parent IO in same context, - * it's a deadlock. To avoid, it, it uses worker thread context. + * Block layer want one ->submit_bio to be active at a time, so if we use + * chained IO with parent IO in same context, it's a deadlock. To avoid that, + * use a worker thread context. */ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *bio) @@ -1031,41 +1023,6 @@ static ssize_t comp_algorithm_store(struct device *dev, return len; } -static ssize_t use_dedup_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - bool val; - struct zram *zram = dev_to_zram(dev); - - down_read(&zram->init_lock); - val = zram->use_dedup; - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%d\n", (int)val); -} - -#ifdef CONFIG_ZRAM_DEDUP -static ssize_t use_dedup_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int val; - struct zram *zram = dev_to_zram(dev); - - if (kstrtoint(buf, 10, &val) || (val != 0 && val != 1)) - return -EINVAL; - - down_write(&zram->init_lock); - if (init_done(zram)) { - up_write(&zram->init_lock); - pr_info("Can't change dedup usage for initialized device\n"); - return -EBUSY; - } - zram->use_dedup = val; - up_write(&zram->init_lock); - return len; -} -#endif - static ssize_t compact_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -1122,7 +1079,7 @@ static ssize_t mm_stat_show(struct device *dev, max_used = atomic_long_read(&zram->stats.max_used_pages); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu %8llu\n", + "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), mem_used << PAGE_SHIFT, @@ -1131,8 +1088,7 @@ static ssize_t mm_stat_show(struct device *dev, (u64)atomic64_read(&zram->stats.same_pages), pool_stats.pages_compacted, (u64)atomic64_read(&zram->stats.huge_pages), - zram_dedup_dup_size(zram), - zram_dedup_meta_size(zram)); + (u64)atomic64_read(&zram->stats.huge_pages_since)); up_read(&zram->init_lock); return ret; @@ -1183,56 +1139,6 @@ static DEVICE_ATTR_RO(bd_stat); #endif static DEVICE_ATTR_RO(debug_stat); -static unsigned long zram_entry_handle(struct zram *zram, - struct zram_entry *entry) -{ - if (zram_dedup_enabled(zram)) - return entry->handle; - else - return (unsigned long)entry; -} - -static struct zram_entry *zram_entry_alloc(struct zram *zram, - unsigned int len, gfp_t flags) -{ - struct zram_entry *entry; - unsigned long handle; - - handle = zs_malloc(zram->mem_pool, len, flags); - if (!handle) - return NULL; - - if (!zram_dedup_enabled(zram)) - return (struct zram_entry *)handle; - - entry = kzalloc(sizeof(*entry), - flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); - if (!entry) { - zs_free(zram->mem_pool, handle); - return NULL; - } - - zram_dedup_init_entry(zram, entry, handle, len); - atomic64_add(sizeof(*entry), &zram->stats.meta_data_size); - - return entry; -} - -void zram_entry_free(struct zram *zram, struct zram_entry *entry) -{ - if (!zram_dedup_put_entry(zram, entry)) - return; - - zs_free(zram->mem_pool, zram_entry_handle(zram, entry)); - - if (!zram_dedup_enabled(zram)) - return; - - kfree(entry); - - atomic64_sub(sizeof(*entry), &zram->stats.meta_data_size); -} - static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -1243,7 +1149,6 @@ static void zram_meta_free(struct zram *zram, u64 disksize) zram_free_page(zram, index); zs_destroy_pool(zram->mem_pool); - zram_dedup_fini(zram); vfree(zram->table); } @@ -1264,13 +1169,6 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); - - if (zram_dedup_init(zram, num_pages)) { - vfree(zram->table); - zs_destroy_pool(zram->mem_pool); - return false; - } - return true; } @@ -1281,7 +1179,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) */ static void zram_free_page(struct zram *zram, size_t index) { - struct zram_entry *entry; + unsigned long handle; #ifdef CONFIG_ZRAM_MEMORY_TRACKING zram->table[index].ac_time = 0; @@ -1310,17 +1208,17 @@ static void zram_free_page(struct zram *zram, size_t index) goto out; } - entry = zram_get_entry(zram, index); - if (!entry) + handle = zram_get_handle(zram, index); + if (!handle) return; - zram_entry_free(zram, entry); + zs_free(zram->mem_pool, handle); atomic64_sub(zram_get_obj_size(zram, index), &zram->stats.compr_data_size); out: atomic64_dec(&zram->stats.pages_stored); - zram_set_entry(zram, index, NULL); + zram_set_handle(zram, index, 0); zram_set_obj_size(zram, index, 0); WARN_ON_ONCE(zram->table[index].flags & ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); @@ -1329,10 +1227,11 @@ static void zram_free_page(struct zram *zram, size_t index) static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, struct bio *bio, bool partial_io) { - int ret; - struct zram_entry *entry; + struct zcomp_strm *zstrm; + unsigned long handle; unsigned int size; void *src, *dst; + int ret; zram_slot_lock(zram, index); if (zram_test_flag(zram, index, ZRAM_WB)) { @@ -1348,12 +1247,12 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, bio, partial_io); } - entry = zram_get_entry(zram, index); - if (!entry || zram_test_flag(zram, index, ZRAM_SAME)) { + handle = zram_get_handle(zram, index); + if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { unsigned long value; void *mem; - value = entry ? zram_get_element(zram, index) : 0; + value = handle ? zram_get_element(zram, index) : 0; mem = kmap_atomic(page); zram_fill_page(mem, PAGE_SIZE, value); kunmap_atomic(mem); @@ -1363,26 +1262,26 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, size = zram_get_obj_size(zram, index); - src = zs_map_object(zram->mem_pool, - zram_entry_handle(zram, entry), ZS_MM_RO); + if (size != PAGE_SIZE) + zstrm = zcomp_stream_get(zram->comp); + + src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { dst = kmap_atomic(page); memcpy(dst, src, PAGE_SIZE); kunmap_atomic(dst); ret = 0; } else { - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); - dst = kmap_atomic(page); ret = zcomp_decompress(zstrm, src, size, dst); kunmap_atomic(dst); zcomp_stream_put(zram->comp); } - zs_unmap_object(zram->mem_pool, zram_entry_handle(zram, entry)); + zs_unmap_object(zram->mem_pool, handle); zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret)) + if (WARN_ON(ret)) pr_err("Decompression failed! err=%d, page=%u\n", ret, index); return ret; @@ -1426,12 +1325,11 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, { int ret = 0; unsigned long alloced_pages; - struct zram_entry *entry = NULL; + unsigned long handle = 0; unsigned int comp_len = 0; void *src, *dst, *mem; struct zcomp_strm *zstrm; struct page *page = bvec->bv_page; - u32 checksum; unsigned long element = 0; enum zram_pageflags flags = 0; @@ -1445,12 +1343,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, } kunmap_atomic(mem); - entry = zram_dedup_find(zram, page, &checksum); - if (entry) { - comp_len = entry->len; - goto out; - } - compress_again: zstrm = zcomp_stream_get(zram->comp); src = kmap_atomic(page); @@ -1460,40 +1352,38 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (unlikely(ret)) { zcomp_stream_put(zram->comp); pr_err("Compression failed! err=%d\n", ret); - if (entry) - zram_entry_free(zram, entry); + zs_free(zram->mem_pool, handle); return ret; } if (comp_len >= huge_class_size) comp_len = PAGE_SIZE; /* - * entry allocation has 2 paths: + * handle allocation has 2 paths: * a) fast path is executed with preemption disabled (for * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, * since we can't sleep; * b) slow path enables preemption and attempts to allocate * the page with __GFP_DIRECT_RECLAIM bit set. we have to * put per-cpu compression stream and, thus, to re-do - * the compression once entry is allocated. + * the compression once handle is allocated. * - * if we have a 'non-null' entry here then we are coming - * from the slow path and entry has already been allocated. + * if we have a 'non-null' handle here then we are coming + * from the slow path and handle has already been allocated. */ - if (!entry) - entry = zram_entry_alloc(zram, comp_len, + if (!handle) + handle = zs_malloc(zram->mem_pool, comp_len, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | __GFP_HIGHMEM | - __GFP_MOVABLE | - __GFP_CMA); - if (!entry) { + __GFP_MOVABLE); + if (!handle) { zcomp_stream_put(zram->comp); atomic64_inc(&zram->stats.writestall); - entry = zram_entry_alloc(zram, comp_len, + handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | - __GFP_MOVABLE | __GFP_CMA); - if (entry) + __GFP_MOVABLE); + if (handle) goto compress_again; return -ENOMEM; } @@ -1503,12 +1393,11 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (zram->limit_pages && alloced_pages > zram->limit_pages) { zcomp_stream_put(zram->comp); - zram_entry_free(zram, entry); + zs_free(zram->mem_pool, handle); return -ENOMEM; } - dst = zs_map_object(zram->mem_pool, - zram_entry_handle(zram, entry), ZS_MM_WO); + dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); src = zstrm->buffer; if (comp_len == PAGE_SIZE) @@ -1518,9 +1407,8 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, kunmap_atomic(src); zcomp_stream_put(zram->comp); - zs_unmap_object(zram->mem_pool, zram_entry_handle(zram, entry)); + zs_unmap_object(zram->mem_pool, handle); atomic64_add(comp_len, &zram->stats.compr_data_size); - zram_dedup_insert(zram, entry, checksum); out: /* * Free memory associated with this sector @@ -1532,13 +1420,14 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (comp_len == PAGE_SIZE) { zram_set_flag(zram, index, ZRAM_HUGE); atomic64_inc(&zram->stats.huge_pages); + atomic64_inc(&zram->stats.huge_pages_since); } if (flags) { zram_set_flag(zram, index, flags); zram_set_element(zram, index, element); } else { - zram_set_entry(zram, index, entry); + zram_set_handle(zram, index, handle); zram_set_obj_size(zram, index, comp_len); } zram_slot_unlock(zram, index); @@ -1635,13 +1524,8 @@ static void zram_bio_discard(struct zram *zram, u32 index, static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, unsigned int op, struct bio *bio) { - unsigned long start_time = jiffies; - struct request_queue *q = zram->disk->queue; int ret; - generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT, - &zram->disk->part0); - if (!op_is_write(op)) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); @@ -1651,8 +1535,6 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset, bio); } - generic_end_io_acct(q, op, &zram->disk->part0, start_time); - zram_slot_lock(zram, index); zram_accessed(zram, index); zram_slot_unlock(zram, index); @@ -1673,6 +1555,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) u32 index; struct bio_vec bvec; struct bvec_iter iter; + unsigned long start_time; index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; offset = (bio->bi_iter.bi_sector & @@ -1688,6 +1571,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) break; } + start_time = bio_start_io_acct(bio); bio_for_each_segment(bvec, bio, iter) { struct bio_vec bv = bvec; unsigned int unwritten = bvec.bv_len; @@ -1696,8 +1580,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, unwritten); if (zram_bvec_rw(zram, &bv, index, offset, - bio_op(bio), bio) < 0) - goto out; + bio_op(bio), bio) < 0) { + bio->bi_status = BLK_STS_IOERR; + break; + } bv.bv_offset += bv.bv_len; unwritten -= bv.bv_len; @@ -1705,20 +1591,16 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) update_position(&index, &offset, &bv); } while (unwritten); } - + bio_end_io_acct(bio, start_time); bio_endio(bio); - return; - -out: - bio_io_error(bio); } /* * Handler function for all zram I/O requests. */ -static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) +static blk_qc_t zram_submit_bio(struct bio *bio) { - struct zram *zram = queue->queuedata; + struct zram *zram = bio->bi_bdev->bd_disk->private_data; if (!valid_io_request(zram, bio->bi_iter.bi_sector, bio->bi_iter.bi_size)) { @@ -1758,6 +1640,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, u32 index; struct zram *zram; struct bio_vec bv; + unsigned long start_time; if (PageTransHuge(page)) return -ENOTSUPP; @@ -1776,7 +1659,9 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; + start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op); ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); + disk_end_io_acct(bdev->bd_disk, op, start_time); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -1820,8 +1705,8 @@ static void zram_reset_device(struct zram *zram) disksize = zram->disksize; zram->disksize = 0; - set_capacity(zram->disk, 0); - part_stat_set_all(&zram->disk->part0, 0); + set_capacity_and_notify(zram->disk, 0); + part_stat_set_all(zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ @@ -1866,9 +1751,7 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - - revalidate_disk(zram->disk); + set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; @@ -1896,31 +1779,26 @@ static ssize_t reset_store(struct device *dev, return -EINVAL; zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; + bdev = zram->disk->part0; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); /* Do not reset an active device or claimed device */ if (bdev->bd_openers || zram->claim) { - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + mutex_unlock(&bdev->bd_disk->open_mutex); return -EBUSY; } /* From now on, anyone can't open /dev/zram[0-9] */ zram->claim = true; - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk(zram->disk); - bdput(bdev); - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); zram->claim = false; - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); return len; } @@ -1930,7 +1808,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode) int ret = 0; struct zram *zram; - WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); + WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); zram = bdev->bd_disk->private_data; /* zram was claimed to reset so open request fails */ @@ -1942,11 +1820,19 @@ static int zram_open(struct block_device *bdev, fmode_t mode) static const struct block_device_operations zram_devops = { .open = zram_open, + .submit_bio = zram_submit_bio, .swap_slot_free_notify = zram_slot_free_notify, .rw_page = zram_rw_page, .owner = THIS_MODULE }; +static const struct block_device_operations zram_wb_devops = { + .open = zram_open, + .submit_bio = zram_submit_bio, + .swap_slot_free_notify = zram_slot_free_notify, + .owner = THIS_MODULE +}; + static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); @@ -1962,11 +1848,6 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); #endif -#ifdef CONFIG_ZRAM_DEDUP -static DEVICE_ATTR_RW(use_dedup); -#else -static DEVICE_ATTR_RO(use_dedup); -#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1984,7 +1865,6 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, #endif - &dev_attr_use_dedup.attr, &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, #ifdef CONFIG_ZRAM_WRITEBACK @@ -2010,7 +1890,6 @@ static const struct attribute_group *zram_disk_attr_groups[] = { static int zram_add(void) { struct zram *zram; - struct request_queue *queue; int ret, device_id; zram = kzalloc(sizeof(struct zram), GFP_KERNEL); @@ -2026,30 +1905,20 @@ static int zram_add(void) #ifdef CONFIG_ZRAM_WRITEBACK spin_lock_init(&zram->wb_limit_lock); #endif - queue = blk_alloc_queue(GFP_KERNEL); - if (!queue) { - pr_err("Error allocating disk queue for device %d\n", - device_id); - ret = -ENOMEM; - goto out_free_idr; - } - - blk_queue_make_request(queue, zram_make_request); /* gendisk structure */ - zram->disk = alloc_disk(1); + zram->disk = blk_alloc_disk(NUMA_NO_NODE); if (!zram->disk) { pr_err("Error allocating disk structure for device %d\n", device_id); ret = -ENOMEM; - goto out_free_queue; + goto out_free_idr; } zram->disk->major = zram_major; zram->disk->first_minor = device_id; + zram->disk->minors = 1; zram->disk->fops = &zram_devops; - zram->disk->queue = queue; - zram->disk->queue->queuedata = zram; zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); @@ -2083,10 +1952,8 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - zram->disk->queue->backing_dev_info->capabilities |= - (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); - disk_to_dev(zram->disk)->groups = zram_disk_attr_groups; - add_disk(zram->disk); + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); + device_add_disk(NULL, zram->disk, zram_disk_attr_groups); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); @@ -2094,8 +1961,6 @@ static int zram_add(void) pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; -out_free_queue: - blk_cleanup_queue(queue); out_free_idr: idr_remove(&zram_index_idr, device_id); out_free_dev: @@ -2105,33 +1970,27 @@ static int zram_add(void) static int zram_remove(struct zram *zram) { - struct block_device *bdev; + struct block_device *bdev = zram->disk->part0; - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; - - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); if (bdev->bd_openers || zram->claim) { - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + mutex_unlock(&bdev->bd_disk->open_mutex); return -EBUSY; } zram->claim = true; - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); zram_debugfs_unregister(zram); + /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - bdput(bdev); pr_info("Removed device: %s\n", zram->disk->disk_name); del_gendisk(zram->disk); - blk_cleanup_queue(zram->disk->queue); - put_disk(zram->disk); + blk_cleanup_disk(zram->disk); kfree(zram); return 0; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 22f8366d7971..80c3b43b4828 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -18,10 +18,8 @@ #include #include #include -#include #include "zcomp.h" -#include "zram_dedup.h" #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) @@ -58,18 +56,10 @@ enum zram_pageflags { /*-- Data structures */ -struct zram_entry { - struct rb_node rb_node; - u32 len; - u32 checksum; - unsigned long refcount; - unsigned long handle; -}; - /* Allocated for each disk page */ struct zram_table_entry { union { - struct zram_entry *entry; + unsigned long handle; unsigned long element; }; unsigned long flags; @@ -88,6 +78,7 @@ struct zram_stats { atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t same_pages; /* no. of same element filled pages */ atomic64_t huge_pages; /* no. of huge pages */ + atomic64_t huge_pages_since; /* no. of huge pages since zram set up */ atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ @@ -97,16 +88,6 @@ struct zram_stats { atomic64_t bd_reads; /* no. of reads from backing device */ atomic64_t bd_writes; /* no. of writes from backing device */ #endif - atomic64_t dup_data_size; /* - * compressed size of pages - * duplicated - */ - atomic64_t meta_data_size; /* size of zram_entries */ -}; - -struct zram_hash { - spinlock_t lock; - struct rb_root rb_root; }; struct zram { @@ -114,8 +95,6 @@ struct zram { struct zs_pool *mem_pool; struct zcomp *comp; struct gendisk *disk; - struct zram_hash *hash; - size_t hash_size; /* Prevent concurrent execution of device init */ struct rw_semaphore init_lock; /* @@ -133,15 +112,13 @@ struct zram { /* * zram is claimed so open request will be failed */ - bool claim; /* Protected by bdev->bd_mutex */ - bool use_dedup; - struct file *backing_dev; + bool claim; /* Protected by disk->open_mutex */ #ifdef CONFIG_ZRAM_WRITEBACK + struct file *backing_dev; spinlock_t wb_limit_lock; bool wb_limit_enable; u64 bd_wb_limit; struct block_device *bdev; - unsigned int old_block_size; unsigned long *bitmap; unsigned long nr_pages; #endif @@ -149,15 +126,4 @@ struct zram { struct dentry *debugfs_dir; #endif }; - -static inline bool zram_dedup_enabled(struct zram *zram) -{ -#ifdef CONFIG_ZRAM_DEDUP - return zram->use_dedup; -#else - return false; -#endif -} - -void zram_entry_free(struct zram *zram, struct zram_entry *entry); #endif From b9a3d0cb8a436d249b2f4e05ad95f8bd5c71cb6e Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Wed, 22 Sep 2021 01:06:39 +0900 Subject: [PATCH 19/44] zram: fix build Signed-off-by: Juhyung Park Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zcomp.c | 7 +- drivers/block/zram/zcomp.h | 3 - drivers/block/zram/zram_drv.c | 117 ++++++++++++++++++++-------------- drivers/block/zram/zram_drv.h | 2 +- 4 files changed, 73 insertions(+), 56 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 052aa3f65514..f41293160e32 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -111,13 +111,12 @@ ssize_t zcomp_available_show(const char *comp, char *buf) struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { - local_lock(&comp->stream->lock); - return this_cpu_ptr(comp->stream); + return get_cpu_ptr(comp->stream); } void zcomp_stream_put(struct zcomp *comp) { - local_unlock(&comp->stream->lock); + put_cpu_ptr(comp->stream); } int zcomp_compress(struct zcomp_strm *zstrm, @@ -161,8 +160,6 @@ int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) int ret; zstrm = per_cpu_ptr(comp->stream, cpu); - local_lock_init(&zstrm->lock); - ret = zcomp_strm_init(zstrm, comp); if (ret) pr_err("Can't allocate a compression stream\n"); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 40f6420f4b2e..72c2ee4d843e 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -5,11 +5,8 @@ #ifndef _ZCOMP_H_ #define _ZCOMP_H_ -#include struct zcomp_strm { - /* The members ->buffer and ->tfm are protected by ->lock. */ - local_lock_t lock; /* compression/decompression buffer */ void *buffer; struct crypto_comp *tfm; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2f3b1af107ee..f911d21687e0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -33,7 +33,6 @@ #include #include #include -#include #include "zram_drv.h" @@ -52,9 +51,6 @@ static unsigned int num_devices = 1; */ static size_t huge_class_size; -static const struct block_device_operations zram_devops; -static const struct block_device_operations zram_wb_devops; - static void zram_free_page(struct zram *zram, size_t index); static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -408,7 +404,8 @@ static void reset_bdev(struct zram *zram) filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; zram->bdev = NULL; - zram->disk->fops = &zram_devops; + zram->disk->queue->backing_dev_info->capabilities |= + BDI_CAP_SYNCHRONOUS_IO; kvfree(zram->bitmap); zram->bitmap = NULL; } @@ -522,7 +519,8 @@ static ssize_t backing_dev_store(struct device *dev, * freely but in fact, IO is going on so finally could cause * use-after-free when the IO is really done. */ - zram->disk->fops = &zram_wb_devops; + zram->disk->queue->backing_dev_info->capabilities &= + ~BDI_CAP_SYNCHRONOUS_IO; up_write(&zram->init_lock); pr_info("setup backing device %s\n", file_name); @@ -803,9 +801,9 @@ static void zram_sync_read(struct work_struct *work) } /* - * Block layer want one ->submit_bio to be active at a time, so if we use - * chained IO with parent IO in same context, it's a deadlock. To avoid that, - * use a worker thread context. + * Block layer want one ->make_request_fn to be active at a time + * so if we use chained IO with parent IO in same context, + * it's a deadlock. To avoid, it, it uses worker thread context. */ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *bio) @@ -1524,8 +1522,13 @@ static void zram_bio_discard(struct zram *zram, u32 index, static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, unsigned int op, struct bio *bio) { + unsigned long start_time = jiffies; + struct request_queue *q = zram->disk->queue; int ret; + generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT, + &zram->disk->part0); + if (!op_is_write(op)) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); @@ -1535,6 +1538,8 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset, bio); } + generic_end_io_acct(q, op, &zram->disk->part0, start_time); + zram_slot_lock(zram, index); zram_accessed(zram, index); zram_slot_unlock(zram, index); @@ -1555,7 +1560,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) u32 index; struct bio_vec bvec; struct bvec_iter iter; - unsigned long start_time; index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; offset = (bio->bi_iter.bi_sector & @@ -1571,7 +1575,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) break; } - start_time = bio_start_io_acct(bio); bio_for_each_segment(bvec, bio, iter) { struct bio_vec bv = bvec; unsigned int unwritten = bvec.bv_len; @@ -1580,10 +1583,8 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, unwritten); if (zram_bvec_rw(zram, &bv, index, offset, - bio_op(bio), bio) < 0) { - bio->bi_status = BLK_STS_IOERR; - break; - } + bio_op(bio), bio) < 0) + goto out; bv.bv_offset += bv.bv_len; unwritten -= bv.bv_len; @@ -1591,16 +1592,20 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) update_position(&index, &offset, &bv); } while (unwritten); } - bio_end_io_acct(bio, start_time); + bio_endio(bio); + return; + +out: + bio_io_error(bio); } /* * Handler function for all zram I/O requests. */ -static blk_qc_t zram_submit_bio(struct bio *bio) +static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) { - struct zram *zram = bio->bi_bdev->bd_disk->private_data; + struct zram *zram = bio->bi_disk->private_data; if (!valid_io_request(zram, bio->bi_iter.bi_sector, bio->bi_iter.bi_size)) { @@ -1640,7 +1645,6 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, u32 index; struct zram *zram; struct bio_vec bv; - unsigned long start_time; if (PageTransHuge(page)) return -ENOTSUPP; @@ -1659,9 +1663,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op); ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); - disk_end_io_acct(bdev->bd_disk, op, start_time); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -1705,8 +1707,8 @@ static void zram_reset_device(struct zram *zram) disksize = zram->disksize; zram->disksize = 0; - set_capacity_and_notify(zram->disk, 0); - part_stat_set_all(zram->disk->part0, 0); + set_capacity(zram->disk, 0); + part_stat_set_all(&zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ @@ -1751,7 +1753,9 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; - set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + + revalidate_disk(zram->disk); up_write(&zram->init_lock); return len; @@ -1779,26 +1783,31 @@ static ssize_t reset_store(struct device *dev, return -EINVAL; zram = dev_to_zram(dev); - bdev = zram->disk->part0; + bdev = bdget_disk(zram->disk, 0); + if (!bdev) + return -ENOMEM; - mutex_lock(&bdev->bd_disk->open_mutex); + mutex_lock(&bdev->bd_mutex); /* Do not reset an active device or claimed device */ if (bdev->bd_openers || zram->claim) { - mutex_unlock(&bdev->bd_disk->open_mutex); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); return -EBUSY; } /* From now on, anyone can't open /dev/zram[0-9] */ zram->claim = true; - mutex_unlock(&bdev->bd_disk->open_mutex); + mutex_unlock(&bdev->bd_mutex); /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); + revalidate_disk(zram->disk); + bdput(bdev); - mutex_lock(&bdev->bd_disk->open_mutex); + mutex_lock(&bdev->bd_mutex); zram->claim = false; - mutex_unlock(&bdev->bd_disk->open_mutex); + mutex_unlock(&bdev->bd_mutex); return len; } @@ -1808,7 +1817,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode) int ret = 0; struct zram *zram; - WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); + WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); zram = bdev->bd_disk->private_data; /* zram was claimed to reset so open request fails */ @@ -1820,19 +1829,11 @@ static int zram_open(struct block_device *bdev, fmode_t mode) static const struct block_device_operations zram_devops = { .open = zram_open, - .submit_bio = zram_submit_bio, .swap_slot_free_notify = zram_slot_free_notify, .rw_page = zram_rw_page, .owner = THIS_MODULE }; -static const struct block_device_operations zram_wb_devops = { - .open = zram_open, - .submit_bio = zram_submit_bio, - .swap_slot_free_notify = zram_slot_free_notify, - .owner = THIS_MODULE -}; - static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); @@ -1890,6 +1891,7 @@ static const struct attribute_group *zram_disk_attr_groups[] = { static int zram_add(void) { struct zram *zram; + struct request_queue *queue; int ret, device_id; zram = kzalloc(sizeof(struct zram), GFP_KERNEL); @@ -1905,20 +1907,31 @@ static int zram_add(void) #ifdef CONFIG_ZRAM_WRITEBACK spin_lock_init(&zram->wb_limit_lock); #endif + queue = blk_alloc_queue(GFP_KERNEL); + if (!queue) { + pr_err("Error allocating disk queue for device %d\n", + device_id); + ret = -ENOMEM; + goto out_free_idr; + } + + blk_queue_make_request(queue, zram_make_request); /* gendisk structure */ - zram->disk = blk_alloc_disk(NUMA_NO_NODE); + zram->disk = alloc_disk(1); if (!zram->disk) { pr_err("Error allocating disk structure for device %d\n", device_id); ret = -ENOMEM; - goto out_free_idr; + goto out_free_queue; } zram->disk->major = zram_major; zram->disk->first_minor = device_id; zram->disk->minors = 1; zram->disk->fops = &zram_devops; + zram->disk->queue = queue; + zram->disk->queue->queuedata = zram; zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); @@ -1952,7 +1965,8 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); + zram->disk->queue->backing_dev_info->capabilities |= + (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); device_add_disk(NULL, zram->disk, zram_disk_attr_groups); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); @@ -1961,6 +1975,8 @@ static int zram_add(void) pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; +out_free_queue: + blk_cleanup_queue(queue); out_free_idr: idr_remove(&zram_index_idr, device_id); out_free_dev: @@ -1970,27 +1986,34 @@ static int zram_add(void) static int zram_remove(struct zram *zram) { - struct block_device *bdev = zram->disk->part0; + struct block_device *bdev; + + bdev = bdget_disk(zram->disk, 0); + if (!bdev) + return -ENOMEM; - mutex_lock(&bdev->bd_disk->open_mutex); + mutex_lock(&bdev->bd_mutex); if (bdev->bd_openers || zram->claim) { - mutex_unlock(&bdev->bd_disk->open_mutex); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); return -EBUSY; } zram->claim = true; - mutex_unlock(&bdev->bd_disk->open_mutex); + mutex_unlock(&bdev->bd_mutex); zram_debugfs_unregister(zram); /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); + bdput(bdev); pr_info("Removed device: %s\n", zram->disk->disk_name); del_gendisk(zram->disk); - blk_cleanup_disk(zram->disk); + blk_cleanup_queue(zram->disk->queue); + put_disk(zram->disk); kfree(zram); return 0; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 80c3b43b4828..6e73dc3c2769 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -112,7 +112,7 @@ struct zram { /* * zram is claimed so open request will be failed */ - bool claim; /* Protected by disk->open_mutex */ + bool claim; /* Protected by bdev->bd_mutex */ #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; spinlock_t wb_limit_lock; From b17888a482c405aca3bd8f4a52ef809d9e74f285 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 12 May 2017 11:30:01 +0900 Subject: [PATCH 20/44] zram: introduce zram_entry to prepare dedup functionality Following patch will implement deduplication functionality in the zram and it requires an indirection layer to manage the life cycle of zsmalloc handle. To prepare that, this patch introduces zram_entry which can be used to manage the life-cycle of zsmalloc handle. Many lines are changed due to rename but core change is just simple introduction about newly data structure. Change-Id: Ibf9912397c8c7dbcf1465550bc83a71f904e41c7 Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Joonsoo Kim Link: https://lore.kernel.org/patchwork/patch/787161/ Patch-mainline: linux-kernel@ Thu, 11 May 2017 22:30:21 Signed-off-by: Charan Teja Reddy Signed-off-by: Park Ju Hyung Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 88 +++++++++++++++++++++++------------ drivers/block/zram/zram_drv.h | 6 ++- 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f911d21687e0..5ba7a3d4154c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -81,14 +81,15 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } -static unsigned long zram_get_handle(struct zram *zram, u32 index) +static struct zram_entry *zram_get_entry(struct zram *zram, u32 index) { - return zram->table[index].handle; + return zram->table[index].entry; } -static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) +static void zram_set_entry(struct zram *zram, u32 index, + struct zram_entry *entry) { - zram->table[index].handle = handle; + zram->table[index].entry = entry; } /* flag operations require table entry bit_spin_lock() being held */ @@ -1137,6 +1138,32 @@ static DEVICE_ATTR_RO(bd_stat); #endif static DEVICE_ATTR_RO(debug_stat); +static struct zram_entry *zram_entry_alloc(struct zram *zram, + unsigned int len, gfp_t flags) +{ + struct zram_entry *entry; + + entry = kzalloc(sizeof(*entry), + flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); + if (!entry) + return NULL; + + entry->handle = zs_malloc(zram->mem_pool, len, flags); + if (!entry->handle) { + kfree(entry); + return NULL; + } + + return entry; +} + +static inline void zram_entry_free(struct zram *zram, + struct zram_entry *entry) +{ + zs_free(zram->mem_pool, entry->handle); + kfree(entry); +} + static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -1177,7 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) */ static void zram_free_page(struct zram *zram, size_t index) { - unsigned long handle; + struct zram_entry *entry; #ifdef CONFIG_ZRAM_MEMORY_TRACKING zram->table[index].ac_time = 0; @@ -1206,17 +1233,17 @@ static void zram_free_page(struct zram *zram, size_t index) goto out; } - handle = zram_get_handle(zram, index); - if (!handle) + entry = zram_get_entry(zram, index); + if (!entry) return; - zs_free(zram->mem_pool, handle); + zram_entry_free(zram, entry); atomic64_sub(zram_get_obj_size(zram, index), &zram->stats.compr_data_size); out: atomic64_dec(&zram->stats.pages_stored); - zram_set_handle(zram, index, 0); + zram_set_entry(zram, index, NULL); zram_set_obj_size(zram, index, 0); WARN_ON_ONCE(zram->table[index].flags & ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); @@ -1226,7 +1253,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, struct bio *bio, bool partial_io) { struct zcomp_strm *zstrm; - unsigned long handle; + struct zram_entry *entry; unsigned int size; void *src, *dst; int ret; @@ -1245,12 +1272,12 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, bio, partial_io); } - handle = zram_get_handle(zram, index); - if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { + entry = zram_get_entry(zram, index); + if (!entry || zram_test_flag(zram, index, ZRAM_SAME)) { unsigned long value; void *mem; - value = handle ? zram_get_element(zram, index) : 0; + value = entry ? zram_get_element(zram, index) : 0; mem = kmap_atomic(page); zram_fill_page(mem, PAGE_SIZE, value); kunmap_atomic(mem); @@ -1263,7 +1290,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, if (size != PAGE_SIZE) zstrm = zcomp_stream_get(zram->comp); - src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); + src = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_RO); if (size == PAGE_SIZE) { dst = kmap_atomic(page); memcpy(dst, src, PAGE_SIZE); @@ -1275,7 +1302,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, kunmap_atomic(dst); zcomp_stream_put(zram->comp); } - zs_unmap_object(zram->mem_pool, handle); + zs_unmap_object(zram->mem_pool, entry->handle); zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ @@ -1323,7 +1350,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, { int ret = 0; unsigned long alloced_pages; - unsigned long handle = 0; + struct zram_entry *entry = NULL; unsigned int comp_len = 0; void *src, *dst, *mem; struct zcomp_strm *zstrm; @@ -1350,38 +1377,39 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (unlikely(ret)) { zcomp_stream_put(zram->comp); pr_err("Compression failed! err=%d\n", ret); - zs_free(zram->mem_pool, handle); + if (entry) + zram_entry_free(zram, entry); return ret; } if (comp_len >= huge_class_size) comp_len = PAGE_SIZE; /* - * handle allocation has 2 paths: + * entry allocation has 2 paths: * a) fast path is executed with preemption disabled (for * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, * since we can't sleep; * b) slow path enables preemption and attempts to allocate * the page with __GFP_DIRECT_RECLAIM bit set. we have to * put per-cpu compression stream and, thus, to re-do - * the compression once handle is allocated. + * the compression once entry is allocated. * - * if we have a 'non-null' handle here then we are coming - * from the slow path and handle has already been allocated. + * if we have a 'non-null' entry here then we are coming + * from the slow path and entry has already been allocated. */ - if (!handle) - handle = zs_malloc(zram->mem_pool, comp_len, + if (!entry) + entry = zram_entry_alloc(zram, comp_len, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | __GFP_HIGHMEM | __GFP_MOVABLE); - if (!handle) { + if (!entry) { zcomp_stream_put(zram->comp); atomic64_inc(&zram->stats.writestall); - handle = zs_malloc(zram->mem_pool, comp_len, + entry = zram_entry_alloc(zram, comp_len, GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); - if (handle) + if (entry) goto compress_again; return -ENOMEM; } @@ -1391,11 +1419,11 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (zram->limit_pages && alloced_pages > zram->limit_pages) { zcomp_stream_put(zram->comp); - zs_free(zram->mem_pool, handle); + zram_entry_free(zram, entry); return -ENOMEM; } - dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); + dst = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_WO); src = zstrm->buffer; if (comp_len == PAGE_SIZE) @@ -1405,7 +1433,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, kunmap_atomic(src); zcomp_stream_put(zram->comp); - zs_unmap_object(zram->mem_pool, handle); + zs_unmap_object(zram->mem_pool, entry->handle); atomic64_add(comp_len, &zram->stats.compr_data_size); out: /* @@ -1425,7 +1453,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, zram_set_flag(zram, index, flags); zram_set_element(zram, index, element); } else { - zram_set_handle(zram, index, handle); + zram_set_entry(zram, index, entry); zram_set_obj_size(zram, index, comp_len); } zram_slot_unlock(zram, index); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 6e73dc3c2769..c089eaa204ef 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -56,10 +56,14 @@ enum zram_pageflags { /*-- Data structures */ +struct zram_entry { + unsigned long handle; +}; + /* Allocated for each disk page */ struct zram_table_entry { union { - unsigned long handle; + struct zram_entry *entry; unsigned long element; }; unsigned long flags; From 5db99f4e92af8468493107ec75f4924c4074c9a0 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 12 May 2017 11:30:02 +0900 Subject: [PATCH 21/44] zram: implement deduplication in zram This patch implements deduplication feature in zram. The purpose of this work is naturally to save amount of memory usage by zram. Android is one of the biggest users to use zram as swap and it's really important to save amount of memory usage. There is a paper that reports that duplication ratio of Android's memory content is rather high [1]. And, there is a similar work on zswap that also reports that experiments has shown that around 10-15% of pages stored in zswp are duplicates and deduplicate them provides some benefits [2]. Also, there is a different kind of workload that uses zram as blockdev and store build outputs into it to reduce wear-out problem of real blockdev. In this workload, deduplication hit is very high due to temporary files and intermediate object files. Detailed analysis is on the bottom of this description. Anyway, if we can detect duplicated content and avoid to store duplicated content at different memory space, we can save memory. This patch tries to do that. Implementation is almost simple and intuitive but I should note one thing about implementation detail. To check duplication, this patch uses checksum of the page and collision of this checksum could be possible. There would be many choices to handle this situation but this patch chooses to allow entry with duplicated checksum to be added to the hash, but, not to compare all entries with duplicated checksum when checking duplication. I guess that checksum collision is quite rare event and we don't need to pay any attention to such a case. Therefore, I decided the most simplest way to implement the feature. If there is a different opinion, I can accept and go that way. Following is the result of this patch. Test result #1 (Swap): Android Marshmallow, emulator, x86_64, Backporting to kernel v3.18 orig_data_size: 145297408 compr_data_size: 32408125 mem_used_total: 32276480 dup_data_size: 3188134 meta_data_size: 1444272 Last two metrics added to mm_stat are related to this work. First one, dup_data_size, is amount of saved memory by avoiding to store duplicated page. Later one, meta_data_size, is the amount of data structure to support deduplication. If dup > meta, we can judge that the patch improves memory usage. In Adnroid, we can save 5% of memory usage by this work. Test result #2 (Blockdev): build the kernel and store output to ext4 FS on zram Elapsed time: 249 s mm_stat: 430845952 191014886 196898816 0 196898816 28320 0 0 0 Elapsed time: 250 s mm_stat: 430505984 190971334 148365312 0 148365312 28404 0 47287038 3945792 There is no performance degradation and save 23% memory. Test result #3 (Blockdev): copy android build output dir(out/host) to ext4 FS on zram Elapsed time: out/host: 88 s mm_stat: 8834420736 3658184579 3834208256 0 3834208256 32889 0 0 0 Elapsed time: out/host: 100 s mm_stat: 8832929792 3657329322 2832015360 0 2832015360 32609 0 952568877 80880336 It shows performance degradation roughly 13% and save 24% memory. Maybe, it is due to overhead of calculating checksum and comparison. Test result #4 (Blockdev): copy android build output dir(out/target/common) to ext4 FS on zram Elapsed time: out/host: 203 s mm_stat: 4041678848 2310355010 2346577920 0 2346582016 500 4 0 0 Elapsed time: out/host: 201 s mm_stat: 4041666560 2310488276 1338150912 0 1338150912 476 0 989088794 24564336 Memory is saved by 42% and performance is the same. Even if there is overhead of calculating checksum and comparison, large hit ratio compensate it since hit leads to less compression attempt. I checked the detailed reason of savings on kernel build workload and there are some cases that deduplication happens. 1) *.cmd Build command is usually similar in one directory so content of these file are very similar. In my system, more than 789 lines in fs/ext4/.namei.o.cmd and fs/ext4/.inode.o.cmd are the same in 944 and 938 lines of the file, respectively. 2) intermediate object files built-in.o and temporary object file have the similar contents. More than 50% of fs/ext4/ext4.o is the same with fs/ext4/built-in.o. 3) vmlinux .tmp_vmlinux1 and .tmp_vmlinux2 and arch/x86/boo/compressed/vmlinux.bin have the similar contents. Android test has similar case that some of object files(.class and .so) are similar with another ones. (./host/linux-x86/lib/libartd.so and ./host/linux-x86-lib/libartd-comiler.so) Anyway, benefit seems to be largely dependent on the workload so following patch will make this feature optional. However, this feature can help some usecases so is deserved to be merged. [1]: MemScope: Analyzing Memory Duplication on Android Systems, dl.acm.org/citation.cfm?id=2797023 [2]: zswap: Optimize compressed pool memory utilization, lkml.kernel.org/r/1341407574.7551.1471584870761.JavaMail.weblogic@epwas3p2 Change-Id: I8fe80c956c33f88a6af337d50d9e210e5c35ce37 Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Joonsoo Kim Link: https://lore.kernel.org/patchwork/patch/787162/ Patch-mainline: linux-kernel@ Thu, 11 May 2017 22:30:26 Signed-off-by: Charan Teja Reddy Signed-off-by: Park Ju Hyung Signed-off-by: UtsavBalar1231 --- drivers/block/zram/Makefile | 2 +- drivers/block/zram/zram_dedup.c | 204 ++++++++++++++++++++++++++++++++ drivers/block/zram/zram_dedup.h | 22 ++++ drivers/block/zram/zram_drv.c | 38 +++++- drivers/block/zram/zram_drv.h | 20 ++++ 5 files changed, 279 insertions(+), 7 deletions(-) create mode 100644 drivers/block/zram/zram_dedup.c create mode 100644 drivers/block/zram/zram_dedup.h diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index de9e457907b1..72cc66ffb5e8 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -zram-y := zcomp.o zram_drv.o +zram-y := zcomp.o zram_drv.o zram_dedup.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c new file mode 100644 index 000000000000..a8427f75b6ea --- /dev/null +++ b/drivers/block/zram/zram_dedup.c @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2017 Joonsoo Kim. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "zram_drv.h" + +/* One slot will contain 128 pages theoretically */ +#define ZRAM_HASH_SHIFT 7 +#define ZRAM_HASH_SIZE_MIN (1 << 10) +#define ZRAM_HASH_SIZE_MAX (1 << 31) + +u64 zram_dedup_dup_size(struct zram *zram) +{ + return (u64)atomic64_read(&zram->stats.dup_data_size); +} + +u64 zram_dedup_meta_size(struct zram *zram) +{ + return (u64)atomic64_read(&zram->stats.meta_data_size); +} + +static u32 zram_dedup_checksum(unsigned char *mem) +{ + return jhash(mem, PAGE_SIZE, 0); +} + +void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum) +{ + struct zram_hash *hash; + struct rb_root *rb_root; + struct rb_node **rb_node, *parent = NULL; + struct zram_entry *entry; + + new->checksum = checksum; + hash = &zram->hash[checksum % zram->hash_size]; + rb_root = &hash->rb_root; + + spin_lock(&hash->lock); + rb_node = &rb_root->rb_node; + while (*rb_node) { + parent = *rb_node; + entry = rb_entry(parent, struct zram_entry, rb_node); + if (checksum < entry->checksum) + rb_node = &parent->rb_left; + else if (checksum > entry->checksum) + rb_node = &parent->rb_right; + else + rb_node = &parent->rb_left; + } + + rb_link_node(&new->rb_node, parent, rb_node); + rb_insert_color(&new->rb_node, rb_root); + spin_unlock(&hash->lock); +} + +static bool zram_dedup_match(struct zram *zram, struct zram_entry *entry, + unsigned char *mem) +{ + bool match = false; + unsigned char *cmem; + struct zcomp_strm *zstrm; + + cmem = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_RO); + if (entry->len == PAGE_SIZE) { + match = !memcmp(mem, cmem, PAGE_SIZE); + } else { + zstrm = zcomp_stream_get(zram->comp); + if (!zcomp_decompress(zstrm, cmem, entry->len, zstrm->buffer)) + match = !memcmp(mem, zstrm->buffer, PAGE_SIZE); + zcomp_stream_put(zram->comp); + } + zs_unmap_object(zram->mem_pool, entry->handle); + + return match; +} + +static unsigned long zram_dedup_put(struct zram *zram, + struct zram_entry *entry) +{ + struct zram_hash *hash; + u32 checksum; + + checksum = entry->checksum; + hash = &zram->hash[checksum % zram->hash_size]; + + spin_lock(&hash->lock); + + entry->refcount--; + if (!entry->refcount) + rb_erase(&entry->rb_node, &hash->rb_root); + else + atomic64_sub(entry->len, &zram->stats.dup_data_size); + + spin_unlock(&hash->lock); + + return entry->refcount; +} + +static struct zram_entry *zram_dedup_get(struct zram *zram, + unsigned char *mem, u32 checksum) +{ + struct zram_hash *hash; + struct zram_entry *entry; + struct rb_node *rb_node; + + hash = &zram->hash[checksum % zram->hash_size]; + + spin_lock(&hash->lock); + rb_node = hash->rb_root.rb_node; + while (rb_node) { + entry = rb_entry(rb_node, struct zram_entry, rb_node); + if (checksum == entry->checksum) { + entry->refcount++; + atomic64_add(entry->len, &zram->stats.dup_data_size); + spin_unlock(&hash->lock); + + if (zram_dedup_match(zram, entry, mem)) + return entry; + + zram_entry_free(zram, entry); + + return NULL; + } + + if (checksum < entry->checksum) + rb_node = rb_node->rb_left; + else + rb_node = rb_node->rb_right; + } + spin_unlock(&hash->lock); + + return NULL; +} + +struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, + u32 *checksum) +{ + void *mem; + struct zram_entry *entry; + + mem = kmap_atomic(page); + *checksum = zram_dedup_checksum(mem); + + entry = zram_dedup_get(zram, mem, *checksum); + kunmap_atomic(mem); + + return entry; +} + +void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, + unsigned long handle, unsigned int len) +{ + entry->handle = handle; + entry->refcount = 1; + entry->len = len; +} + +bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry) +{ + if (zram_dedup_put(zram, entry)) + return false; + + return true; +} + +int zram_dedup_init(struct zram *zram, size_t num_pages) +{ + int i; + struct zram_hash *hash; + + zram->hash_size = num_pages >> ZRAM_HASH_SHIFT; + zram->hash_size = min_t(size_t, ZRAM_HASH_SIZE_MAX, zram->hash_size); + zram->hash_size = max_t(size_t, ZRAM_HASH_SIZE_MIN, zram->hash_size); + zram->hash = vzalloc(zram->hash_size * sizeof(struct zram_hash)); + if (!zram->hash) { + pr_err("Error allocating zram entry hash\n"); + return -ENOMEM; + } + + for (i = 0; i < zram->hash_size; i++) { + hash = &zram->hash[i]; + spin_lock_init(&hash->lock); + hash->rb_root = RB_ROOT; + } + + return 0; +} + +void zram_dedup_fini(struct zram *zram) +{ + vfree(zram->hash); + zram->hash = NULL; + zram->hash_size = 0; +} diff --git a/drivers/block/zram/zram_dedup.h b/drivers/block/zram/zram_dedup.h new file mode 100644 index 000000000000..ebe6bff6c0da --- /dev/null +++ b/drivers/block/zram/zram_dedup.h @@ -0,0 +1,22 @@ +#ifndef _ZRAM_DEDUP_H_ +#define _ZRAM_DEDUP_H_ + +struct zram; +struct zram_entry; + +u64 zram_dedup_dup_size(struct zram *zram); +u64 zram_dedup_meta_size(struct zram *zram); + +void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum); +struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, + u32 *checksum); + +void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, + unsigned long handle, unsigned int len); +bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry); + +int zram_dedup_init(struct zram *zram, size_t num_pages); +void zram_dedup_fini(struct zram *zram); + +#endif /* _ZRAM_DEDUP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 5ba7a3d4154c..dd12167a3985 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1078,7 +1078,7 @@ static ssize_t mm_stat_show(struct device *dev, max_used = atomic_long_read(&zram->stats.max_used_pages); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", + "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu %8llu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), mem_used << PAGE_SHIFT, @@ -1087,7 +1087,9 @@ static ssize_t mm_stat_show(struct device *dev, (u64)atomic64_read(&zram->stats.same_pages), pool_stats.pages_compacted, (u64)atomic64_read(&zram->stats.huge_pages), - (u64)atomic64_read(&zram->stats.huge_pages_since)); + (u64)atomic64_read(&zram->stats.huge_pages_since), + zram_dedup_dup_size(zram), + zram_dedup_meta_size(zram)); up_read(&zram->init_lock); return ret; @@ -1142,26 +1144,34 @@ static struct zram_entry *zram_entry_alloc(struct zram *zram, unsigned int len, gfp_t flags) { struct zram_entry *entry; + unsigned long handle; entry = kzalloc(sizeof(*entry), flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); if (!entry) return NULL; - entry->handle = zs_malloc(zram->mem_pool, len, flags); - if (!entry->handle) { + handle = zs_malloc(zram->mem_pool, len, flags); + if (!handle) { kfree(entry); return NULL; } + zram_dedup_init_entry(zram, entry, handle, len); + atomic64_add(sizeof(*entry), &zram->stats.meta_data_size); + return entry; } -static inline void zram_entry_free(struct zram *zram, - struct zram_entry *entry) +void zram_entry_free(struct zram *zram, struct zram_entry *entry) { + if (!zram_dedup_put_entry(zram, entry)) + return; + zs_free(zram->mem_pool, entry->handle); kfree(entry); + + atomic64_sub(sizeof(*entry), &zram->stats.meta_data_size); } static void zram_meta_free(struct zram *zram, u64 disksize) @@ -1174,6 +1184,7 @@ static void zram_meta_free(struct zram *zram, u64 disksize) zram_free_page(zram, index); zs_destroy_pool(zram->mem_pool); + zram_dedup_fini(zram); vfree(zram->table); } @@ -1194,6 +1205,13 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); + + if (zram_dedup_init(zram, num_pages)) { + vfree(zram->table); + zs_destroy_pool(zram->mem_pool); + return false; + } + return true; } @@ -1355,6 +1373,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, void *src, *dst, *mem; struct zcomp_strm *zstrm; struct page *page = bvec->bv_page; + u32 checksum; unsigned long element = 0; enum zram_pageflags flags = 0; @@ -1368,6 +1387,12 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, } kunmap_atomic(mem); + entry = zram_dedup_find(zram, page, &checksum); + if (entry) { + comp_len = entry->len; + goto out; + } + compress_again: zstrm = zcomp_stream_get(zram->comp); src = kmap_atomic(page); @@ -1435,6 +1460,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, zcomp_stream_put(zram->comp); zs_unmap_object(zram->mem_pool, entry->handle); atomic64_add(comp_len, &zram->stats.compr_data_size); + zram_dedup_insert(zram, entry, checksum); out: /* * Free memory associated with this sector diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c089eaa204ef..1652b9dca1e7 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -18,8 +18,10 @@ #include #include #include +#include #include "zcomp.h" +#include "zram_dedup.h" #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) @@ -57,6 +59,10 @@ enum zram_pageflags { /*-- Data structures */ struct zram_entry { + struct rb_node rb_node; + u32 len; + u32 checksum; + unsigned long refcount; unsigned long handle; }; @@ -92,6 +98,16 @@ struct zram_stats { atomic64_t bd_reads; /* no. of reads from backing device */ atomic64_t bd_writes; /* no. of writes from backing device */ #endif + atomic64_t dup_data_size; /* + * compressed size of pages + * duplicated + */ + atomic64_t meta_data_size; /* size of zram_entries */ +}; + +struct zram_hash { + spinlock_t lock; + struct rb_root rb_root; }; struct zram { @@ -99,6 +115,8 @@ struct zram { struct zs_pool *mem_pool; struct zcomp *comp; struct gendisk *disk; + struct zram_hash *hash; + size_t hash_size; /* Prevent concurrent execution of device init */ struct rw_semaphore init_lock; /* @@ -130,4 +148,6 @@ struct zram { struct dentry *debugfs_dir; #endif }; + +void zram_entry_free(struct zram *zram, struct zram_entry *entry); #endif From f9fc6f313b9d03482d2b7aefbf9e815cb39ab313 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 12 May 2017 11:30:03 +0900 Subject: [PATCH 22/44] zram: make deduplication feature optional Benefit of deduplication is dependent on the workload so it's not preferable to always enable. Therefore, make it optional in Kconfig and device param. Default is 'off'. This option will be beneficial for users who use the zram as blockdev and stores build output to it. Change-Id: If282bb8aa15c5749859a87cf36db7eb9edb3b1ed Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Joonsoo Kim Link: https://lore.kernel.org/patchwork/patch/787164/ Patch-mainline: linux-kernel@ Thu, 11 May 2017 22:30:52 Signed-off-by: Charan Teja Reddy Signed-off-by: Park Ju Hyung Signed-off-by: UtsavBalar1231 --- drivers/block/zram/Kconfig | 14 ++++++ drivers/block/zram/Makefile | 3 +- drivers/block/zram/zram_dedup.c | 15 ++++++ drivers/block/zram/zram_dedup.h | 23 ++++++++++ drivers/block/zram/zram_drv.c | 81 ++++++++++++++++++++++++++++----- drivers/block/zram/zram_drv.h | 10 ++++ 6 files changed, 134 insertions(+), 12 deletions(-) diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 4a5c255ca6e4..bdccf98efebe 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -54,6 +54,20 @@ config ZRAM_DEF_COMP default "lz4hc" if ZRAM_DEF_COMP_LZ4HC default "842" if ZRAM_DEF_COMP_842 +config ZRAM_DEDUP + bool "Deduplication support for ZRAM data" + depends on ZRAM + default n + help + Deduplicate ZRAM data to reduce amount of memory consumption. + Advantage largely depends on the workload. In some cases, this + option reduces memory usage to the half. However, if there is no + duplicated data, the amount of memory consumption would be + increased due to additional metadata usage. And, there is + computation time trade-off. Please check the benefit before + enabling this option. Experiment shows the positive effect when + the zram is used as blockdev and is used to store build output. + config ZRAM_WRITEBACK bool "Write back incompressible or idle page to backing device" depends on ZRAM diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index 72cc66ffb5e8..f8ebdea5c113 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -zram-y := zcomp.o zram_drv.o zram_dedup.o +zram-y := zcomp.o zram_drv.o +zram-$(CONFIG_ZRAM_DEDUP) += zram_dedup.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c index a8427f75b6ea..560b1f5da430 100644 --- a/drivers/block/zram/zram_dedup.c +++ b/drivers/block/zram/zram_dedup.c @@ -41,6 +41,9 @@ void zram_dedup_insert(struct zram *zram, struct zram_entry *new, struct rb_node **rb_node, *parent = NULL; struct zram_entry *entry; + if (!zram_dedup_enabled(zram)) + return; + new->checksum = checksum; hash = &zram->hash[checksum % zram->hash_size]; rb_root = &hash->rb_root; @@ -148,6 +151,9 @@ struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, void *mem; struct zram_entry *entry; + if (!zram_dedup_enabled(zram)) + return NULL; + mem = kmap_atomic(page); *checksum = zram_dedup_checksum(mem); @@ -160,6 +166,9 @@ struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, unsigned long handle, unsigned int len) { + if (!zram_dedup_enabled(zram)) + return; + entry->handle = handle; entry->refcount = 1; entry->len = len; @@ -167,6 +176,9 @@ void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry) { + if (!zram_dedup_enabled(zram)) + return true; + if (zram_dedup_put(zram, entry)) return false; @@ -178,6 +190,9 @@ int zram_dedup_init(struct zram *zram, size_t num_pages) int i; struct zram_hash *hash; + if (!zram_dedup_enabled(zram)) + return 0; + zram->hash_size = num_pages >> ZRAM_HASH_SHIFT; zram->hash_size = min_t(size_t, ZRAM_HASH_SIZE_MAX, zram->hash_size); zram->hash_size = max_t(size_t, ZRAM_HASH_SIZE_MIN, zram->hash_size); diff --git a/drivers/block/zram/zram_dedup.h b/drivers/block/zram/zram_dedup.h index ebe6bff6c0da..8ab267b0b956 100644 --- a/drivers/block/zram/zram_dedup.h +++ b/drivers/block/zram/zram_dedup.h @@ -4,6 +4,8 @@ struct zram; struct zram_entry; +#ifdef CONFIG_ZRAM_DEDUP + u64 zram_dedup_dup_size(struct zram *zram); u64 zram_dedup_meta_size(struct zram *zram); @@ -18,5 +20,26 @@ bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry); int zram_dedup_init(struct zram *zram, size_t num_pages); void zram_dedup_fini(struct zram *zram); +#else + +static inline u64 zram_dedup_dup_size(struct zram *zram) { return 0; } +static inline u64 zram_dedup_meta_size(struct zram *zram) { return 0; } + +static inline void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum) { } +static inline struct zram_entry *zram_dedup_find(struct zram *zram, + struct page *page, u32 *checksum) { return NULL; } + +static inline void zram_dedup_init_entry(struct zram *zram, + struct zram_entry *entry, unsigned long handle, + unsigned int len) { } +static inline bool zram_dedup_put_entry(struct zram *zram, + struct zram_entry *entry) { return true; } + +static inline int zram_dedup_init(struct zram *zram, + size_t num_pages) { return 0; } +static inline void zram_dedup_fini(struct zram *zram) { } + +#endif #endif /* _ZRAM_DEDUP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index dd12167a3985..145132eb56b7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1022,6 +1022,41 @@ static ssize_t comp_algorithm_store(struct device *dev, return len; } +static ssize_t use_dedup_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + bool val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->use_dedup; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%d\n", (int)val); +} + +#ifdef CONFIG_ZRAM_DEDUP +static ssize_t use_dedup_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int val; + struct zram *zram = dev_to_zram(dev); + + if (kstrtoint(buf, 10, &val) || (val != 0 && val != 1)) + return -EINVAL; + + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Can't change dedup usage for initialized device\n"); + return -EBUSY; + } + zram->use_dedup = val; + up_write(&zram->init_lock); + return len; +} +#endif + static ssize_t compact_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -1140,20 +1175,32 @@ static DEVICE_ATTR_RO(bd_stat); #endif static DEVICE_ATTR_RO(debug_stat); +static unsigned long zram_entry_handle(struct zram *zram, + struct zram_entry *entry) +{ + if (zram_dedup_enabled(zram)) + return entry->handle; + else + return (unsigned long)entry; +} + static struct zram_entry *zram_entry_alloc(struct zram *zram, unsigned int len, gfp_t flags) { struct zram_entry *entry; unsigned long handle; - entry = kzalloc(sizeof(*entry), - flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); - if (!entry) + handle = zs_malloc(zram->mem_pool, len, flags); + if (!handle) return NULL; - handle = zs_malloc(zram->mem_pool, len, flags); - if (!handle) { - kfree(entry); + if (!zram_dedup_enabled(zram)) + return (struct zram_entry *)handle; + + entry = kzalloc(sizeof(*entry), + flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); + if (!entry) { + zs_free(zram->mem_pool, handle); return NULL; } @@ -1168,7 +1215,11 @@ void zram_entry_free(struct zram *zram, struct zram_entry *entry) if (!zram_dedup_put_entry(zram, entry)) return; - zs_free(zram->mem_pool, entry->handle); + zs_free(zram->mem_pool, zram_entry_handle(zram, entry)); + + if (!zram_dedup_enabled(zram)) + return; + kfree(entry); atomic64_sub(sizeof(*entry), &zram->stats.meta_data_size); @@ -1308,7 +1359,8 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, if (size != PAGE_SIZE) zstrm = zcomp_stream_get(zram->comp); - src = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_RO); + src = zs_map_object(zram->mem_pool, + zram_entry_handle(zram, entry), ZS_MM_RO); if (size == PAGE_SIZE) { dst = kmap_atomic(page); memcpy(dst, src, PAGE_SIZE); @@ -1320,7 +1372,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, kunmap_atomic(dst); zcomp_stream_put(zram->comp); } - zs_unmap_object(zram->mem_pool, entry->handle); + zs_unmap_object(zram->mem_pool, zram_entry_handle(zram, entry)); zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ @@ -1448,7 +1500,8 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, return -ENOMEM; } - dst = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_WO); + dst = zs_map_object(zram->mem_pool, + zram_entry_handle(zram, entry), ZS_MM_WO); src = zstrm->buffer; if (comp_len == PAGE_SIZE) @@ -1458,7 +1511,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, kunmap_atomic(src); zcomp_stream_put(zram->comp); - zs_unmap_object(zram->mem_pool, entry->handle); + zs_unmap_object(zram->mem_pool, zram_entry_handle(zram, entry)); atomic64_add(comp_len, &zram->stats.compr_data_size); zram_dedup_insert(zram, entry, checksum); out: @@ -1903,6 +1956,11 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); #endif +#ifdef CONFIG_ZRAM_DEDUP +static DEVICE_ATTR_RW(use_dedup); +#else +static DEVICE_ATTR_RO(use_dedup); +#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1920,6 +1978,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, #endif + &dev_attr_use_dedup.attr, &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, #ifdef CONFIG_ZRAM_WRITEBACK diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 1652b9dca1e7..b2ca54dada64 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -135,6 +135,7 @@ struct zram { * zram is claimed so open request will be failed */ bool claim; /* Protected by bdev->bd_mutex */ + bool use_dedup; #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; spinlock_t wb_limit_lock; @@ -149,5 +150,14 @@ struct zram { #endif }; +static inline bool zram_dedup_enabled(struct zram *zram) +{ +#ifdef CONFIG_ZRAM_DEDUP + return zram->use_dedup; +#else + return false; +#endif +} + void zram_entry_free(struct zram *zram, struct zram_entry *entry); #endif From 957f82a73f4c8eda34280e32e62020284339c2ab Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 12 May 2017 11:30:04 +0900 Subject: [PATCH 23/44] zram: compare all the entries with same checksum for deduplication Until now, we compare just one entry with same checksum when checking duplication since it is the simplest way to implement. However, for the completeness, checking all the entries is better so this patch implement to compare all the entries with same checksum. Since this event would be rare so there would be no performance loss. Change-Id: Ie7d61c14d127a28f5a06d85b0ca66b9fada20cbb Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Joonsoo Kim Link: https://lore.kernel.org/patchwork/patch/787163/ Patch-mainline: linux-kernel@ Thu, 11 May 2017 22:30:29 Signed-off-by: Charan Teja Reddy Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_dedup.c | 59 ++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c index 560b1f5da430..14c4988f8ff7 100644 --- a/drivers/block/zram/zram_dedup.c +++ b/drivers/block/zram/zram_dedup.c @@ -109,6 +109,51 @@ static unsigned long zram_dedup_put(struct zram *zram, return entry->refcount; } +static struct zram_entry *__zram_dedup_get(struct zram *zram, + struct zram_hash *hash, unsigned char *mem, + struct zram_entry *entry) +{ + struct zram_entry *tmp, *prev = NULL; + struct rb_node *rb_node; + + /* find left-most entry with same checksum */ + while ((rb_node = rb_prev(&entry->rb_node))) { + tmp = rb_entry(rb_node, struct zram_entry, rb_node); + if (tmp->checksum != entry->checksum) + break; + + entry = tmp; + } + +again: + entry->refcount++; + atomic64_add(entry->len, &zram->stats.dup_data_size); + spin_unlock(&hash->lock); + + if (prev) + zram_entry_free(zram, prev); + + if (zram_dedup_match(zram, entry, mem)) + return entry; + + spin_lock(&hash->lock); + tmp = NULL; + rb_node = rb_next(&entry->rb_node); + if (rb_node) + tmp = rb_entry(rb_node, struct zram_entry, rb_node); + + if (tmp && (tmp->checksum == entry->checksum)) { + prev = entry; + entry = tmp; + goto again; + } + + spin_unlock(&hash->lock); + zram_entry_free(zram, entry); + + return NULL; +} + static struct zram_entry *zram_dedup_get(struct zram *zram, unsigned char *mem, u32 checksum) { @@ -122,18 +167,8 @@ static struct zram_entry *zram_dedup_get(struct zram *zram, rb_node = hash->rb_root.rb_node; while (rb_node) { entry = rb_entry(rb_node, struct zram_entry, rb_node); - if (checksum == entry->checksum) { - entry->refcount++; - atomic64_add(entry->len, &zram->stats.dup_data_size); - spin_unlock(&hash->lock); - - if (zram_dedup_match(zram, entry, mem)) - return entry; - - zram_entry_free(zram, entry); - - return NULL; - } + if (checksum == entry->checksum) + return __zram_dedup_get(zram, hash, mem, entry); if (checksum < entry->checksum) rb_node = rb_node->rb_left; From 8ece784e4e74ee3e0fa121ae8964da44f5a0a240 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 16 Jul 2019 16:54:31 +0530 Subject: [PATCH 24/44] zram: fix race condition while returning zram_entry refcount With deduplication enabled, the duplicated zram objects are tracked using the zram_entry backed by a refcount. The race condition while decrementing the refcount through zram_dedup_put() is as follows: Say Task A and task B share the same object and thus the zram_entry->refcount = 2. Task A Task B zram_dedup_put zram_dedup_put spin_lock(&hash->lock); entry->refcount--; (Now it is 1) spin_unlock(&hash->lock); spin_lock(&hash->lock); entry->refcount--; (Now it is 0) spin_unlock(&hash->lock); return entry->refcount return entry->refcount We return 0 in above steps thus leading to double free of the handle, which is a slab object. Change-Id: I8dd9bad27140a6e3a295905bf4411050d8eac931 Signed-off-by: Charan Teja Reddy Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_dedup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c index 14c4988f8ff7..e441289fff81 100644 --- a/drivers/block/zram/zram_dedup.c +++ b/drivers/block/zram/zram_dedup.c @@ -92,13 +92,14 @@ static unsigned long zram_dedup_put(struct zram *zram, { struct zram_hash *hash; u32 checksum; + unsigned long val; checksum = entry->checksum; hash = &zram->hash[checksum % zram->hash_size]; spin_lock(&hash->lock); - entry->refcount--; + val = --entry->refcount; if (!entry->refcount) rb_erase(&entry->rb_node, &hash->rb_root); else @@ -106,7 +107,7 @@ static unsigned long zram_dedup_put(struct zram *zram, spin_unlock(&hash->lock); - return entry->refcount; + return val; } static struct zram_entry *__zram_dedup_get(struct zram *zram, From 41e368dcf1f19f06aad08ca4747e9a158eb85892 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sun, 4 Apr 2021 02:33:50 +0900 Subject: [PATCH 25/44] zram: use xxhash instead of jhash in dedup Signed-off-by: Park Ju Hyung Change-Id: I960b4c6198ca711adab778737d90ff5ec8a09c3f Signed-off-by: UtsavBalar1231 --- drivers/block/zram/Kconfig | 1 + drivers/block/zram/zram_dedup.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index bdccf98efebe..4923185dd988 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -57,6 +57,7 @@ config ZRAM_DEF_COMP config ZRAM_DEDUP bool "Deduplication support for ZRAM data" depends on ZRAM + select XXHASH default n help Deduplicate ZRAM data to reduce amount of memory consumption. diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c index e441289fff81..c7e6b890352f 100644 --- a/drivers/block/zram/zram_dedup.c +++ b/drivers/block/zram/zram_dedup.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include "zram_drv.h" @@ -30,7 +30,7 @@ u64 zram_dedup_meta_size(struct zram *zram) static u32 zram_dedup_checksum(unsigned char *mem) { - return jhash(mem, PAGE_SIZE, 0); + return xxhash(mem, PAGE_SIZE, 0); } void zram_dedup_insert(struct zram *zram, struct zram_entry *new, From d31e8dcc7c057fcf996874ab155f718688612ef5 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sun, 4 Apr 2021 16:47:36 +0900 Subject: [PATCH 26/44] zram: enable dedup by default Signed-off-by: Park Ju Hyung Change-Id: I04cc2a0685661ca04bed542a634b19fcb50cd3f4 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 145132eb56b7..74fe5e3ecebf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2019,6 +2019,9 @@ static int zram_add(void) init_rwsem(&zram->init_lock); #ifdef CONFIG_ZRAM_WRITEBACK spin_lock_init(&zram->wb_limit_lock); +#endif +#ifdef CONFIG_ZRAM_DEDUP + zram->use_dedup = true; #endif queue = blk_alloc_queue(GFP_KERNEL); if (!queue) { From b5a6bcd7fbb89bdd455c1488c333ad271bae7865 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sun, 4 Apr 2021 16:57:12 +0900 Subject: [PATCH 27/44] zram: show deduped status in debugfs Signed-off-by: Park Ju Hyung Change-Id: If349e68ff0b53c4b25dc53f170a6ea70be35f610 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 18 ++++++++++++++---- drivers/block/zram/zram_drv.h | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 74fe5e3ecebf..a1eba43325ed 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -901,13 +901,14 @@ static ssize_t read_block_state(struct file *file, char __user *buf, ts = ktime_to_timespec64(zram->table[index].ac_time); copied = snprintf(kbuf + written, count, - "%12zd %12lld.%06lu %c%c%c%c\n", + "%12zd %12lld.%06lu %c%c%c%c%c\n", index, (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC, zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', - zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); + zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', + zram_test_flag(zram, index, ZRAM_DEDUPED) ? 'd' : '.'); if (count < copied) { zram_slot_unlock(zram, index); @@ -1281,6 +1282,9 @@ static void zram_free_page(struct zram *zram, size_t index) if (zram_test_flag(zram, index, ZRAM_IDLE)) zram_clear_flag(zram, index, ZRAM_IDLE); + if (zram_test_flag(zram, index, ZRAM_DEDUPED)) + zram_clear_flag(zram, index, ZRAM_DEDUPED); + if (zram_test_flag(zram, index, ZRAM_HUGE)) { zram_clear_flag(zram, index, ZRAM_HUGE); atomic64_dec(&zram->stats.huge_pages); @@ -1441,6 +1445,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, entry = zram_dedup_find(zram, page, &checksum); if (entry) { + flags = ZRAM_DEDUPED; comp_len = entry->len; goto out; } @@ -1528,10 +1533,15 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, atomic64_inc(&zram->stats.huge_pages_since); } - if (flags) { + switch (flags) { + case ZRAM_SAME: zram_set_flag(zram, index, flags); zram_set_element(zram, index, element); - } else { + break; + case ZRAM_DEDUPED: + zram_set_flag(zram, index, flags); + // Fallthrough + default: zram_set_entry(zram, index, entry); zram_set_obj_size(zram, index, comp_len); } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index b2ca54dada64..f21a5492cf35 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -52,6 +52,7 @@ enum zram_pageflags { ZRAM_UNDER_WB, /* page is under writeback */ ZRAM_HUGE, /* Incompressible page */ ZRAM_IDLE, /* not accessed page since last idle marking */ + ZRAM_DEDUPED, /* Deduplicated with existing entry */ __NR_ZRAM_PAGEFLAGS, }; From 181fd647897bc333202a4826436c7c495912cc69 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sun, 4 Apr 2021 17:03:07 +0900 Subject: [PATCH 28/44] zram: remove incompressible page handling Incompressible doesn't mean it'll be unused in the near future. Rely on existing LRU eviction method to write those to the backing device. Signed-off-by: Park Ju Hyung Change-Id: Ic956c4bb0b098c80a1eaf6afe72367baba744df6 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 28 ++++------------------------ drivers/block/zram/zram_drv.h | 3 --- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a1eba43325ed..0f589a136c72 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -612,8 +612,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, #define PAGE_WB_SIG "page_index=" #define PAGE_WRITEBACK 0 -#define HUGE_WRITEBACK 1 -#define IDLE_WRITEBACK 2 +#define IDLE_WRITEBACK 1 static ssize_t writeback_store(struct device *dev, @@ -631,8 +630,6 @@ static ssize_t writeback_store(struct device *dev, if (sysfs_streq(buf, "idle")) mode = IDLE_WRITEBACK; - else if (sysfs_streq(buf, "huge")) - mode = HUGE_WRITEBACK; else { if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) return -EINVAL; @@ -697,9 +694,6 @@ static ssize_t writeback_store(struct device *dev, if (mode == IDLE_WRITEBACK && !zram_test_flag(zram, index, ZRAM_IDLE)) goto next; - if (mode == HUGE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_HUGE)) - goto next; /* * Clearing ZRAM_UNDER_WB is duty of caller. * IOW, zram_free_page never clear it. @@ -901,12 +895,11 @@ static ssize_t read_block_state(struct file *file, char __user *buf, ts = ktime_to_timespec64(zram->table[index].ac_time); copied = snprintf(kbuf + written, count, - "%12zd %12lld.%06lu %c%c%c%c%c\n", + "%12zd %12lld.%06lu %c%c%c%c\n", index, (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC, zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', - zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', zram_test_flag(zram, index, ZRAM_DEDUPED) ? 'd' : '.'); @@ -1114,16 +1107,14 @@ static ssize_t mm_stat_show(struct device *dev, max_used = atomic_long_read(&zram->stats.max_used_pages); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu %8llu %8llu\n", + "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), mem_used << PAGE_SHIFT, zram->limit_pages << PAGE_SHIFT, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.same_pages), - pool_stats.pages_compacted, - (u64)atomic64_read(&zram->stats.huge_pages), - (u64)atomic64_read(&zram->stats.huge_pages_since), + atomic_long_read(&pool_stats.pages_compacted), zram_dedup_dup_size(zram), zram_dedup_meta_size(zram)); up_read(&zram->init_lock); @@ -1285,11 +1276,6 @@ static void zram_free_page(struct zram *zram, size_t index) if (zram_test_flag(zram, index, ZRAM_DEDUPED)) zram_clear_flag(zram, index, ZRAM_DEDUPED); - if (zram_test_flag(zram, index, ZRAM_HUGE)) { - zram_clear_flag(zram, index, ZRAM_HUGE); - atomic64_dec(&zram->stats.huge_pages); - } - if (zram_test_flag(zram, index, ZRAM_WB)) { zram_clear_flag(zram, index, ZRAM_WB); free_block_bdev(zram, zram_get_element(zram, index)); @@ -1527,12 +1513,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, zram_slot_lock(zram, index); zram_free_page(zram, index); - if (comp_len == PAGE_SIZE) { - zram_set_flag(zram, index, ZRAM_HUGE); - atomic64_inc(&zram->stats.huge_pages); - atomic64_inc(&zram->stats.huge_pages_since); - } - switch (flags) { case ZRAM_SAME: zram_set_flag(zram, index, flags); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index f21a5492cf35..859c4974a427 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -50,7 +50,6 @@ enum zram_pageflags { ZRAM_SAME, /* Page consists the same element */ ZRAM_WB, /* page is stored on backing_device */ ZRAM_UNDER_WB, /* page is under writeback */ - ZRAM_HUGE, /* Incompressible page */ ZRAM_IDLE, /* not accessed page since last idle marking */ ZRAM_DEDUPED, /* Deduplicated with existing entry */ @@ -88,8 +87,6 @@ struct zram_stats { atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t same_pages; /* no. of same element filled pages */ - atomic64_t huge_pages; /* no. of huge pages */ - atomic64_t huge_pages_since; /* no. of huge pages since zram set up */ atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ From 2d60e68431f39ffbd074ef425f30b00e1cee8c80 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Mon, 5 Apr 2021 00:45:57 +0900 Subject: [PATCH 29/44] zram: remove writeback_limit function This will be unused/superseded with upcoming commits Signed-off-by: Park Ju Hyung Change-Id: Iecdcb442713aa4ba670849ed5c5430037a045238 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 89 ----------------------------------- drivers/block/zram/zram_drv.h | 3 -- 2 files changed, 92 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0f589a136c72..2a56a32a8081 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -322,76 +322,6 @@ static ssize_t idle_store(struct device *dev, } #ifdef CONFIG_ZRAM_WRITEBACK -static ssize_t writeback_limit_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct zram *zram = dev_to_zram(dev); - u64 val; - ssize_t ret = -EINVAL; - - if (kstrtoull(buf, 10, &val)) - return ret; - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - zram->wb_limit_enable = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - ret = len; - - return ret; -} - -static ssize_t writeback_limit_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - bool val; - struct zram *zram = dev_to_zram(dev); - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - val = zram->wb_limit_enable; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%d\n", val); -} - -static ssize_t writeback_limit_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct zram *zram = dev_to_zram(dev); - u64 val; - ssize_t ret = -EINVAL; - - if (kstrtoull(buf, 10, &val)) - return ret; - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - zram->bd_wb_limit = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - ret = len; - - return ret; -} - -static ssize_t writeback_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val; - struct zram *zram = dev_to_zram(dev); - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - val = zram->bd_wb_limit; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); -} - static void reset_bdev(struct zram *zram) { struct block_device *bdev; @@ -666,14 +596,6 @@ static ssize_t writeback_store(struct device *dev, bvec.bv_len = PAGE_SIZE; bvec.bv_offset = 0; - spin_lock(&zram->wb_limit_lock); - if (zram->wb_limit_enable && !zram->bd_wb_limit) { - spin_unlock(&zram->wb_limit_lock); - ret = -EIO; - break; - } - spin_unlock(&zram->wb_limit_lock); - if (!blk_idx) { blk_idx = alloc_block_bdev(zram); if (!blk_idx) { @@ -759,10 +681,6 @@ static ssize_t writeback_store(struct device *dev, zram_set_element(zram, index, blk_idx); blk_idx = 0; atomic64_inc(&zram->stats.pages_stored); - spin_lock(&zram->wb_limit_lock); - if (zram->wb_limit_enable && zram->bd_wb_limit > 0) - zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); - spin_unlock(&zram->wb_limit_lock); next: zram_slot_unlock(zram, index); } @@ -1943,8 +1861,6 @@ static DEVICE_ATTR_RW(comp_algorithm); #ifdef CONFIG_ZRAM_WRITEBACK static DEVICE_ATTR_RW(backing_dev); static DEVICE_ATTR_WO(writeback); -static DEVICE_ATTR_RW(writeback_limit); -static DEVICE_ATTR_RW(writeback_limit_enable); #endif #ifdef CONFIG_ZRAM_DEDUP static DEVICE_ATTR_RW(use_dedup); @@ -1965,8 +1881,6 @@ static struct attribute *zram_disk_attrs[] = { #ifdef CONFIG_ZRAM_WRITEBACK &dev_attr_backing_dev.attr, &dev_attr_writeback.attr, - &dev_attr_writeback_limit.attr, - &dev_attr_writeback_limit_enable.attr, #endif &dev_attr_use_dedup.attr, &dev_attr_io_stat.attr, @@ -2007,9 +1921,6 @@ static int zram_add(void) device_id = ret; init_rwsem(&zram->init_lock); -#ifdef CONFIG_ZRAM_WRITEBACK - spin_lock_init(&zram->wb_limit_lock); -#endif #ifdef CONFIG_ZRAM_DEDUP zram->use_dedup = true; #endif diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 859c4974a427..8aa20e6526f9 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -136,9 +136,6 @@ struct zram { bool use_dedup; #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; - spinlock_t wb_limit_lock; - bool wb_limit_enable; - u64 bd_wb_limit; struct block_device *bdev; unsigned long *bitmap; unsigned long nr_pages; From 73de8fda159a31fdd55947691e7b3f3be96def3b Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Sun, 4 Apr 2021 23:51:19 +0900 Subject: [PATCH 30/44] zram: trim the backing device upon registration We potentially suffer from insufficient free blocks from the SSD, causing slowdowns from high GC overheads as we never trim the backing device. This is not an issue in practice because the userdata partition will be trimmed well. Still, let's do a whole partition discard upon registration for minor optimization. It takes less than 60ms to trim a 4GB partition in modern asynchronous trim designs used in UFS FTL. Signed-off-by: Park Ju Hyung Change-Id: Ib54b5142831cc8c397ff7ba5bc94c23058d80f98 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2a56a32a8081..531005f6a98c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -380,7 +380,7 @@ static ssize_t backing_dev_store(struct device *dev, struct inode *inode; struct address_space *mapping; unsigned int bitmap_sz; - unsigned long nr_pages, *bitmap = NULL; + unsigned long nr_pages, nr_size, *bitmap = NULL; struct block_device *bdev = NULL; int err; struct zram *zram = dev_to_zram(dev); @@ -426,7 +426,8 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - nr_pages = i_size_read(inode) >> PAGE_SHIFT; + nr_size = i_size_read(inode); + nr_pages = nr_size >> PAGE_SHIFT; bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); if (!bitmap) { @@ -434,6 +435,12 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } + // Trim the device + pr_info("discarding backing device\n"); + err = blkdev_issue_discard(bdev, 0, nr_size >> 9, GFP_KERNEL, 0); + if (err) + pr_warn("failed to discard device: %d\n", err); + reset_bdev(zram); zram->bdev = bdev; From 20172e0aa65a5570b5eaf2117ff0c058c0a0f6fd Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Mon, 5 Apr 2021 00:56:47 +0900 Subject: [PATCH 31/44] zram: do not writeback deduped pages Those are not worth it to writeback Signed-off-by: Park Ju Hyung Change-Id: I588b012cade5a193ca198695d37696d394933c1f Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 531005f6a98c..94228b0f2dc7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -617,6 +617,7 @@ static ssize_t writeback_store(struct device *dev, if (zram_test_flag(zram, index, ZRAM_WB) || zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_DEDUPED) || zram_test_flag(zram, index, ZRAM_UNDER_WB)) goto next; From 171442a051a04a8259f47bfd838bfffb047eebb6 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Mon, 5 Apr 2021 00:50:16 +0900 Subject: [PATCH 32/44] zram: defer bvec assignment If loops are continued, bvec assignment at that point is just increasing overhead. Signed-off-by: Park Ju Hyung Change-Id: Ic1009a92923088653ffa32968f84f30d12e50919 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 94228b0f2dc7..7ebc3f9d081e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -599,10 +599,6 @@ static ssize_t writeback_store(struct device *dev, for (; nr_pages != 0; index++, nr_pages--) { struct bio_vec bvec; - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; - if (!blk_idx) { blk_idx = alloc_block_bdev(zram); if (!blk_idx) { @@ -632,6 +628,11 @@ static ssize_t writeback_store(struct device *dev, /* Need for hugepage writeback racing */ zram_set_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); + + bvec.bv_page = page; + bvec.bv_len = PAGE_SIZE; + bvec.bv_offset = 0; + if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); From 594870e1dfdbcb3a7a4b932aefa3739009be21a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:36 +0200 Subject: [PATCH 33/44] bdi: remove BDI_CAP_SYNCHRONOUS_IO BDI_CAP_SYNCHRONOUS_IO is only checked in the swap code, and used to decided if ->rw_page can be used on a block device. Just check up for the method instead. The only complication is that zram needs a second set of block_device_operations as it can switch between modes that actually support ->rw_page and those who don't. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe Signed-off-by: Juhyung Park Change-Id: I76402462142f15e2d90cc57d346b17ded34a97f6 Signed-off-by: UtsavBalar1231 --- drivers/block/brd.c | 1 - drivers/block/zram/zram_drv.c | 18 ++++++++++++------ include/linux/backing-dev.h | 8 -------- mm/swapfile.c | 2 +- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 02e8fff3f828..66a5bb0d9613 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -394,7 +394,6 @@ static struct brd_device *brd_alloc(int i) disk->flags = GENHD_FL_EXT_DEVT; sprintf(disk->disk_name, "ram%d", i); set_capacity(disk, rd_size * 2); - brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7ebc3f9d081e..672719721827 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -51,6 +51,9 @@ static unsigned int num_devices = 1; */ static size_t huge_class_size; +static const struct block_device_operations zram_devops; +static const struct block_device_operations zram_wb_devops; + static void zram_free_page(struct zram *zram, size_t index); static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -335,8 +338,7 @@ static void reset_bdev(struct zram *zram) filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; zram->bdev = NULL; - zram->disk->queue->backing_dev_info->capabilities |= - BDI_CAP_SYNCHRONOUS_IO; + zram->disk->fops = &zram_devops; kvfree(zram->bitmap); zram->bitmap = NULL; } @@ -457,8 +459,7 @@ static ssize_t backing_dev_store(struct device *dev, * freely but in fact, IO is going on so finally could cause * use-after-free when the IO is really done. */ - zram->disk->queue->backing_dev_info->capabilities &= - ~BDI_CAP_SYNCHRONOUS_IO; + zram->disk->fops = &zram_wb_devops; up_write(&zram->init_lock); pr_info("setup backing device %s\n", file_name); @@ -1858,6 +1859,12 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +static const struct block_device_operations zram_wb_devops = { + .open = zram_open, + .swap_slot_free_notify = zram_slot_free_notify, + .owner = THIS_MODULE +}; + static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); @@ -1991,8 +1998,7 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - zram->disk->queue->backing_dev_info->capabilities |= - (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); + zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; device_add_disk(NULL, zram->disk, zram_disk_attr_groups); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 6f968073b898..2d99e1b0aa66 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -124,8 +124,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. * * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. - * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be - * inefficient. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 @@ -133,7 +131,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 -#define BDI_CAP_SYNCHRONOUS_IO 0x00000040 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) @@ -179,11 +176,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO; -} - static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) { return bdi->capabilities & BDI_CAP_STABLE_WRITES; diff --git a/mm/swapfile.c b/mm/swapfile.c index 2b6d6b2e40a1..07e9fd237041 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3211,7 +3211,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) p->flags |= SWP_STABLE_WRITES; - if (bdi_cap_synchronous_io(inode_to_bdi(inode))) + if (p->bdev && p->bdev->bd_disk->fops->rw_page) p->flags |= SWP_SYNCHRONOUS_IO; if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { From 94537927ba07c136a7a8fdaf44f14ef7d6b7627b Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Sun, 3 Oct 2021 17:38:24 +0900 Subject: [PATCH 34/44] zram: switch to 64-bit hash for dedup The original dedup code does not handle collision from the observation that it practically does not happen. For additional peace of mind, use a bigger hash size for reducing the possibility of collision even further. Signed-off-by: Juhyung Park Change-Id: I8c8f5b623032018aeebb8953cc29328046eae9bc Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_dedup.c | 12 ++++++------ drivers/block/zram/zram_dedup.h | 8 ++++---- drivers/block/zram/zram_drv.c | 2 +- drivers/block/zram/zram_drv.h | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c index c7e6b890352f..22f2312e18c4 100644 --- a/drivers/block/zram/zram_dedup.c +++ b/drivers/block/zram/zram_dedup.c @@ -28,13 +28,13 @@ u64 zram_dedup_meta_size(struct zram *zram) return (u64)atomic64_read(&zram->stats.meta_data_size); } -static u32 zram_dedup_checksum(unsigned char *mem) +static u64 zram_dedup_checksum(unsigned char *mem) { - return xxhash(mem, PAGE_SIZE, 0); + return xxh64(mem, PAGE_SIZE, 0); } void zram_dedup_insert(struct zram *zram, struct zram_entry *new, - u32 checksum) + u64 checksum) { struct zram_hash *hash; struct rb_root *rb_root; @@ -91,7 +91,7 @@ static unsigned long zram_dedup_put(struct zram *zram, struct zram_entry *entry) { struct zram_hash *hash; - u32 checksum; + u64 checksum; unsigned long val; checksum = entry->checksum; @@ -156,7 +156,7 @@ static struct zram_entry *__zram_dedup_get(struct zram *zram, } static struct zram_entry *zram_dedup_get(struct zram *zram, - unsigned char *mem, u32 checksum) + unsigned char *mem, u64 checksum) { struct zram_hash *hash; struct zram_entry *entry; @@ -182,7 +182,7 @@ static struct zram_entry *zram_dedup_get(struct zram *zram, } struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, - u32 *checksum) + u64 *checksum) { void *mem; struct zram_entry *entry; diff --git a/drivers/block/zram/zram_dedup.h b/drivers/block/zram/zram_dedup.h index 8ab267b0b956..d8ee79654604 100644 --- a/drivers/block/zram/zram_dedup.h +++ b/drivers/block/zram/zram_dedup.h @@ -10,9 +10,9 @@ u64 zram_dedup_dup_size(struct zram *zram); u64 zram_dedup_meta_size(struct zram *zram); void zram_dedup_insert(struct zram *zram, struct zram_entry *new, - u32 checksum); + u64 checksum); struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, - u32 *checksum); + u64 *checksum); void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, unsigned long handle, unsigned int len); @@ -26,9 +26,9 @@ static inline u64 zram_dedup_dup_size(struct zram *zram) { return 0; } static inline u64 zram_dedup_meta_size(struct zram *zram) { return 0; } static inline void zram_dedup_insert(struct zram *zram, struct zram_entry *new, - u32 checksum) { } + u64 checksum) { } static inline struct zram_entry *zram_dedup_find(struct zram *zram, - struct page *page, u32 *checksum) { return NULL; } + struct page *page, u64 *checksum) { return NULL; } static inline void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, unsigned long handle, diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 672719721827..cd162e6eda1d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1343,7 +1343,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, void *src, *dst, *mem; struct zcomp_strm *zstrm; struct page *page = bvec->bv_page; - u32 checksum; + u64 checksum; unsigned long element = 0; enum zram_pageflags flags = 0; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 8aa20e6526f9..7b4f1122165b 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -61,7 +61,7 @@ enum zram_pageflags { struct zram_entry { struct rb_node rb_node; u32 len; - u32 checksum; + u64 checksum; unsigned long refcount; unsigned long handle; }; From c176cdd5f6d9c4bd5bfe7fddd33e54382edcd3e7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Oct 2021 10:54:23 +0800 Subject: [PATCH 35/44] zram: fix race between zram_reset_device() and disksize_store() When the ->init_lock is released in zram_reset_device(), disksize_store() can come in and try to allocate meta, but zram_reset_device() is freeing free meta, so cause races. Link: https://lore.kernel.org/linux-block/20210927163805.808907-1-mcgrof@kernel.org/T/#mc617f865a3fa2778e40f317ddf48f6447c20c073 Reported-by: Luis Chamberlain Reviewed-by: Luis Chamberlain Signed-off-by: Ming Lei Acked-by: Minchan Kim Link: https://lore.kernel.org/r/20211025025426.2815424-2-ming.lei@redhat.com Signed-off-by: Jens Axboe Change-Id: I9eee5c009355a935723835bbecf866e6fb7f6a97 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cd162e6eda1d..4dc852fbf4e0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1735,12 +1735,13 @@ static void zram_reset_device(struct zram *zram) set_capacity(zram->disk, 0); part_stat_set_all(&zram->disk->part0, 0); - up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ zram_meta_free(zram, disksize); memset(&zram->stats, 0, sizeof(zram->stats)); zcomp_destroy(comp); reset_bdev(zram); + + up_write(&zram->init_lock); } static ssize_t disksize_store(struct device *dev, From fbc35c350afb0a26b8d96b7f1e8cc0a67b76c387 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Oct 2021 10:54:26 +0800 Subject: [PATCH 36/44] zram: replace fsync_bdev with sync_blockdev When calling fsync_bdev(), zram driver guarantees that the bdev won't be opened by anyone, then there can't be one active fs/superblock over the zram bdev, so replace fsync_bdev with sync_blockdev. Reviewed-by: Luis Chamberlain Signed-off-by: Ming Lei Acked-by: Minchan Kim Link: https://lore.kernel.org/r/20211025025426.2815424-5-ming.lei@redhat.com Signed-off-by: Jens Axboe Change-Id: Ib0826c26b805e332bfa1ee72bb5873fa2dc9caca Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4dc852fbf4e0..06d38e2c5bd9 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1826,7 +1826,7 @@ static ssize_t reset_store(struct device *dev, mutex_unlock(&bdev->bd_mutex); /* Make sure all the pending I/O are finished */ - fsync_bdev(bdev); + sync_blockdev(bdev); zram_reset_device(zram); revalidate_disk(zram->disk); bdput(bdev); @@ -2038,7 +2038,7 @@ static int zram_remove(struct zram *zram) zram_debugfs_unregister(zram); /* Make sure all the pending I/O are finished */ - fsync_bdev(bdev); + sync_blockdev(bdev); zram_reset_device(zram); bdput(bdev); From 2e01393e9e55778d0b37002f3583b17231ddabb8 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Fri, 5 Nov 2021 13:45:09 -0700 Subject: [PATCH 37/44] zram_drv: allow reclaim on bio_alloc The read_from_bdev_async is not called on atomic context. So GFP_NOIO is available rather than GFP_ATOMIC. If there were reclaimable pages with GFP_NOIO, we can avoid allocation failure and page fault failure. Link: https://lkml.kernel.org/r/20210908005241.28062-1-jaewon31.kim@samsung.com Signed-off-by: Jaewon Kim Reported-by: Yong-Taek Lee Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I10deb56bbf3c1f27938aa8d093ced5e3d3dcb4ed Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 06d38e2c5bd9..98af62f830f6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -524,7 +524,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, { struct bio *bio; - bio = bio_alloc(GFP_ATOMIC, 1); + bio = bio_alloc(GFP_NOIO, 1); if (!bio) return -ENOMEM; From 3fe266113a09315d30c7de60e36c8f3674248061 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Nov 2021 13:45:12 -0700 Subject: [PATCH 38/44] zram: off by one in read_block_state() [ Upstream commit a88e03cf3d190cf46bc4063a9b7efe87590de5f4 ] snprintf() returns the number of bytes it would have printed if there were space. But it does not count the NUL terminator. So that means that if "count == copied" then this has already overflowed by one character. This bug likely isn't super harmful in real life. Link: https://lkml.kernel.org/r/20210916130404.GA25094@kili Fixes: c0265342bff4 ("zram: introduce zram memory tracking") Signed-off-by: Dan Carpenter Cc: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 98af62f830f6..7ceae41bfbe9 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -831,7 +831,7 @@ static ssize_t read_block_state(struct file *file, char __user *buf, zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', zram_test_flag(zram, index, ZRAM_DEDUPED) ? 'd' : '.'); - if (count < copied) { + if (count <= copied) { zram_slot_unlock(zram, index); break; } From c9b8e69c040814f1bb38b207e3b8d025e14d2f98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E8=8F=8C=E2=80=A2=E6=9C=AA=E9=9C=9C?= <799620521@qq.com> Date: Sun, 20 Mar 2022 21:26:09 +0800 Subject: [PATCH 39/44] zram: Do not subtract deduplicated data from compressed size when freeing pages In the function __zram_bvec_write, when we match that the checksum of the current data is already in the list, we will skip the compression operation and allocate the zram entry directly, and will not calculate the size of the duplicate data in the storage page compression size . In the function zram_free_page, the deduplicated data size is directly subtracted from the stored page compression size indiscriminately, which will inevitably lead to statistical errors, and compr_data_size will continue to decrease, so that it will be less than 0 after a period of time. The data is written into the structure of u64, causing the data to exceed the value. Therefore, we add a flag to record which pages have been deduplicated, and no longer subtract the size of these pages from compr_data_size when freeing pages. Change-Id: Idedf4b7ddd5e2f6afb5dde8a2f8c55c4b1aec481 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7ceae41bfbe9..ed85c8e7c1a4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1194,6 +1194,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) static void zram_free_page(struct zram *zram, size_t index) { struct zram_entry *entry; + bool is_deduped; #ifdef CONFIG_ZRAM_MEMORY_TRACKING zram->table[index].ac_time = 0; @@ -1201,7 +1202,8 @@ static void zram_free_page(struct zram *zram, size_t index) if (zram_test_flag(zram, index, ZRAM_IDLE)) zram_clear_flag(zram, index, ZRAM_IDLE); - if (zram_test_flag(zram, index, ZRAM_DEDUPED)) + is_deduped = zram_test_flag(zram, index, ZRAM_DEDUPED); + if (is_deduped) zram_clear_flag(zram, index, ZRAM_DEDUPED); if (zram_test_flag(zram, index, ZRAM_WB)) { @@ -1226,8 +1228,9 @@ static void zram_free_page(struct zram *zram, size_t index) zram_entry_free(zram, entry); - atomic64_sub(zram_get_obj_size(zram, index), - &zram->stats.compr_data_size); + if (!is_deduped) + atomic64_sub(zram_get_obj_size(zram, index), + &zram->stats.compr_data_size); out: atomic64_dec(&zram->stats.pages_stored); zram_set_entry(zram, index, NULL); From 0a7f7d4c913bb3191a15204b54f96d8622a9438f Mon Sep 17 00:00:00 2001 From: LibXZR Date: Sat, 24 Apr 2021 23:18:01 +0800 Subject: [PATCH 40/44] drivers: zram_drv: Expose zram_compact() for zram0 * Automatic compaction needs this Signed-off-by: LibXZR Change-Id: If3ec2e7be7974934fcd009fe96aaa00660f5c060 Signed-off-by: UtsavBalar1231 --- drivers/block/zram/zram_drv.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index ed85c8e7c1a4..4962976100a6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -51,6 +51,8 @@ static unsigned int num_devices = 1; */ static size_t huge_class_size; +static struct zram *zram0; + static const struct block_device_operations zram_devops; static const struct block_device_operations zram_wb_devops; @@ -979,20 +981,25 @@ static ssize_t use_dedup_store(struct device *dev, } #endif -static ssize_t compact_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +void zram_compact(void) { - struct zram *zram = dev_to_zram(dev); + if (!zram0) + return; - down_read(&zram->init_lock); - if (!init_done(zram)) { - up_read(&zram->init_lock); - return -EINVAL; + down_read(&zram0->init_lock); + if (!init_done(zram0)) { + up_read(&zram0->init_lock); + return; } - zs_compact(zram->mem_pool); - up_read(&zram->init_lock); + zs_compact(zram0->mem_pool); + up_read(&zram0->init_lock); +} +static ssize_t compact_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + zram_compact(); return len; } @@ -2008,6 +2015,7 @@ static int zram_add(void) strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); zram_debugfs_register(zram); + zram0 = zram; pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; @@ -2050,6 +2058,7 @@ static int zram_remove(struct zram *zram) del_gendisk(zram->disk); blk_cleanup_queue(zram->disk->queue); put_disk(zram->disk); + zram0 = NULL; kfree(zram); return 0; } From 73d591b38312287662e8cc6038d5e9b557610485 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 23 Feb 2021 17:28:55 +0800 Subject: [PATCH 41/44] block: genhd: add 'groups' argument to device_add_disk commit fef912bf860e8e7e48a2bfb978a356bba743a8b7 upstream. Update device_add_disk() to take an 'groups' argument so that individual drivers can register a device with additional sysfs attributes. This avoids race condition the driver would otherwise have if these groups were to be created with sysfs_add_groups(). Signed-off-by: Martin Wilck Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe Signed-off-by: Jeffle Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: UtsavBalar1231 --- arch/um/drivers/ubd_kern.c | 2 +- block/genhd.c | 19 ++++++++++++++----- drivers/block/floppy.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/ps3vram.c | 2 +- drivers/block/rsxx/dev.c | 2 +- drivers/block/skd_main.c | 2 +- drivers/block/sunvdc.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/vs_block_client.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/ide/ide-cd.c | 2 +- drivers/ide/ide-gd.c | 2 +- drivers/memstick/core/ms_block.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/core/block.c | 2 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/nvdimm/blk.c | 2 +- drivers/nvdimm/btt.c | 2 +- drivers/nvdimm/pmem.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/multipath.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- drivers/s390/block/dcssblk.c | 2 +- drivers/s390/block/scm_blk.c | 2 +- drivers/scsi/sd.c | 2 +- drivers/scsi/sr.c | 2 +- include/linux/genhd.h | 5 +++-- 29 files changed, 44 insertions(+), 34 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 748bd0921dff..788c80abff5d 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -891,7 +891,7 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk); + device_add_disk(parent, disk, NULL); *disk_out = disk; return 0; diff --git a/block/genhd.c b/block/genhd.c index 6965dde96373..aee2fa9de1a7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -582,7 +582,8 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void register_disk(struct device *parent, struct gendisk *disk) +static void register_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -597,6 +598,10 @@ static void register_disk(struct device *parent, struct gendisk *disk) /* delay uevents, until we scanned partition table */ dev_set_uevent_suppress(ddev, 1); + if (groups) { + WARN_ON(ddev->groups); + ddev->groups = groups; + } if (device_add(ddev)) return; if (!sysfs_deprecated) { @@ -664,6 +669,7 @@ static void register_disk(struct device *parent, struct gendisk *disk) * __device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups * @register_queue: register the queue if set to true * * This function registers the partitioning information in @disk @@ -672,6 +678,7 @@ static void register_disk(struct device *parent, struct gendisk *disk) * FIXME: error handling */ static void __device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, bool register_queue) { dev_t devt; @@ -715,7 +722,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } - register_disk(parent, disk); + register_disk(parent, disk, groups); if (register_queue) blk_register_queue(disk); @@ -729,15 +736,17 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_integrity_add(disk); } -void device_add_disk(struct device *parent, struct gendisk *disk) +void device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) + { - __device_add_disk(parent, disk, true); + __device_add_disk(parent, disk, groups, true); } EXPORT_SYMBOL(device_add_disk); void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) { - __device_add_disk(parent, disk, false); + __device_add_disk(parent, disk, NULL, false); } EXPORT_SYMBOL(device_add_disk_no_queue_reg); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index bf222c4b2f82..8f444b375761 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4713,7 +4713,7 @@ static int __init do_floppy_init(void) /* to be cleaned up... */ disks[drive]->private_data = (void *)(long)drive; disks[drive]->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&floppy_device[drive].dev, disks[drive]); + device_add_disk(&floppy_device[drive].dev, disks[drive], NULL); } return 0; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index d0666f5ce003..1d7d48d8a205 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3861,7 +3861,7 @@ static int mtip_block_initialize(struct driver_data *dd) set_capacity(dd->disk, capacity); /* Enable the block device and add it to /dev */ - device_add_disk(&dd->pdev->dev, dd->disk); + device_add_disk(&dd->pdev->dev, dd->disk, NULL); dd->bdev = bdget_disk(dd->disk, 0); /* diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index bd1c66f5631a..42bff6b1d6a8 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -499,7 +499,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) gendisk->disk_name, priv->model, priv->raw_capacity >> 11, get_capacity(gendisk) >> 11); - device_add_disk(&dev->sbd.core, gendisk); + device_add_disk(&dev->sbd.core, gendisk, NULL); return 0; fail_cleanup_queue: diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1e3d5de9d838..c0c50816a10b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -769,7 +769,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n", gendisk->disk_name, get_capacity(gendisk) >> 11); - device_add_disk(&dev->core, gendisk); + device_add_disk(&dev->core, gendisk, NULL); return 0; fail_cleanup_queue: diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 1a92f9e65937..3894aa0f350b 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -226,7 +226,7 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card) set_capacity(card->gendisk, card->size8 >> 9); else set_capacity(card->gendisk, 0); - device_add_disk(CARD_TO_DEV(card), card->gendisk); + device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); card->bdev_attached = 1; } diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 27323fa23997..80a5806ede03 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -3104,7 +3104,7 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo) static int skd_bdev_attach(struct device *parent, struct skd_device *skdev) { dev_dbg(&skdev->pdev->dev, "add_disk\n"); - device_add_disk(parent, skdev->disk); + device_add_disk(parent, skdev->disk, NULL); return 0; } diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5d7024057540..6b7b0d8a2acb 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -862,7 +862,7 @@ static int probe_disk(struct vdc_port *port) port->vdisk_size, (port->vdisk_size >> (20 - 9)), port->vio.ver.major, port->vio.ver.minor); - device_add_disk(&port->vio.vdev->dev, g); + device_add_disk(&port->vio.vdev->dev, g, NULL); return 0; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 075523777a4a..e03e13a3f32c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -858,7 +858,7 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - device_add_disk(&vdev->dev, vblk->disk); + device_add_disk(&vdev->dev, vblk->disk, NULL); err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); if (err) goto out_del_disk; diff --git a/drivers/block/vs_block_client.c b/drivers/block/vs_block_client.c index 4e3ba351cf37..18cec2e92575 100644 --- a/drivers/block/vs_block_client.c +++ b/drivers/block/vs_block_client.c @@ -550,7 +550,7 @@ static int vs_block_client_disk_add(struct block_client *client) client->blkdev = blkdev; vs_service_state_unlock(client->service); - device_add_disk(&client->service->dev, blkdev->disk); + device_add_disk(&client->service->dev, blkdev->disk, NULL); dev_dbg(&client->service->dev, "added block disk '%s'\n", blkdev->disk->disk_name); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d4ceee3825f8..1b06c8e46ffa 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2422,7 +2422,7 @@ static void blkfront_connect(struct blkfront_info *info) for (i = 0; i < info->nr_rings; i++) kick_pending_request_queues(&info->rinfo[i]); - device_add_disk(&info->xbdev->dev, info->gd); + device_add_disk(&info->xbdev->dev, info->gd, NULL); info->is_ready = 1; return; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 44a7a255ef74..f9b59d41813f 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1784,7 +1784,7 @@ static int ide_cd_probe(ide_drive_t *drive) ide_cd_read_toc(drive); g->fops = &idecd_ops; g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - device_add_disk(&drive->gendev, g); + device_add_disk(&drive->gendev, g, NULL); return 0; out_free_disk: diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index e823394ed543..04e008e8f6f9 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -416,7 +416,7 @@ static int ide_gd_probe(ide_drive_t *drive) if (drive->dev_flags & IDE_DFLAG_REMOVABLE) g->flags = GENHD_FL_REMOVABLE; g->fops = &ide_gd_ops; - device_add_disk(&drive->gendev, g); + device_add_disk(&drive->gendev, g, NULL); return 0; out_free_disk: diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 716fc8ed31d3..8a02f11076f9 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2146,7 +2146,7 @@ static int msb_init_disk(struct memstick_dev *card) set_disk_ro(msb->disk, 1); msb_start(card); - device_add_disk(&card->dev, msb->disk); + device_add_disk(&card->dev, msb->disk, NULL); dbg("Disk added"); return 0; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 5ee932631fae..0cd30dcb6801 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1236,7 +1236,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) set_capacity(msb->disk, capacity); dev_dbg(&card->dev, "capacity set %ld\n", capacity); - device_add_disk(&card->dev, msb->disk); + device_add_disk(&card->dev, msb->disk, NULL); msb->active = 1; return 0; diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index bcb83a1ca9e9..141ae5767b27 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2750,7 +2750,7 @@ static int mmc_add_disk(struct mmc_blk_data *md) int ret; struct mmc_card *card = md->queue.card; - device_add_disk(md->parent, md->disk); + device_add_disk(md->parent, md->disk, NULL); md->force_ro.show = force_ro_show; md->force_ro.store = force_ro_store; sysfs_attr_init(&md->force_ro.attr); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 29c0bfd74e8a..6a41dfa3c36b 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -447,7 +447,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) if (new->readonly) set_disk_ro(gd, 1); - device_add_disk(&new->mtd->dev, gd); + device_add_disk(&new->mtd->dev, gd, NULL); if (new->disk_attributes) { ret = sysfs_create_group(&disk_to_dev(gd)->kobj, diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 62e9cb167aad..db45c6bbb7bb 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -290,7 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); - device_add_disk(dev, disk); + device_add_disk(dev, disk, NULL); revalidate_disk(disk); return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 853edc649ed4..b6823e66af13 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1565,7 +1565,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); - device_add_disk(&btt->nd_btt->dev, btt->btt_disk); + device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 18e10421e7d7..504fb60c5818 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -482,7 +482,7 @@ static int pmem_attach_disk(struct device *dev, gendev = disk_to_dev(disk); gendev->groups = pmem_attribute_groups; - device_add_disk(dev, disk); + device_add_disk(dev, disk, NULL); if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ca63c7efbf3f..696ea38057d4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3211,7 +3211,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk); + device_add_disk(ctrl->device, ns->disk, NULL); if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group)) pr_warn("%s: failed to create sysfs group for identification\n", diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index e71075338ff5..ee9e092ace4a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -314,7 +314,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) return; if (!(head->disk->flags & GENHD_FL_UP)) { - device_add_disk(&head->subsys->dev, head->disk); + device_add_disk(&head->subsys->dev, head->disk, NULL); if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, &nvme_ns_id_attr_group)) dev_warn(&head->subsys->dev, diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 7036a6c6f86f..5542d9eadfe0 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -76,7 +76,7 @@ int dasd_gendisk_alloc(struct dasd_block *block) gdp->queue = block->request_queue; block->gdp = gdp; set_capacity(block->gdp, 0); - device_add_disk(&base->cdev->dev, block->gdp); + device_add_disk(&base->cdev->dev, block->gdp, NULL); return 0; } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 23e526cda5c1..4e8aedd50cb0 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -685,7 +685,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char } get_device(&dev_info->dev); - device_add_disk(&dev_info->dev, dev_info->gd); + device_add_disk(&dev_info->dev, dev_info->gd, NULL); switch (dev_info->segment_type) { case SEG_TYPE_SR: diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 98f66b7b6794..e01889394c84 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -500,7 +500,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) /* 512 byte sectors */ set_capacity(bdev->gendisk, scmdev->size >> 9); - device_add_disk(&scmdev->dev, bdev->gendisk); + device_add_disk(&scmdev->dev, bdev->gendisk, NULL); return 0; out_queue: diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5dd379e5bf2b..03699361eecf 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3262,7 +3262,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) if (sdp->autosuspend_delay >= 0) pm_runtime_set_autosuspend_delay(dev, sdp->autosuspend_delay); - device_add_disk(dev, gd); + device_add_disk(dev, gd, NULL); if (sdkp->capacity) sd_dif_config_host(sdkp); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 5be3d6b7991b..45c8bf39ad23 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -758,7 +758,7 @@ static int sr_probe(struct device *dev) dev_set_drvdata(dev, cd); disk->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&sdev->sdev_gendev, disk); + device_add_disk(&sdev->sdev_gendev, disk, NULL); sdev_printk(KERN_DEBUG, sdev, "Attached scsi CD-ROM %s\n", cd->cdi.name); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 48770a0704cc..15994ce4c9a1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -417,10 +417,11 @@ static inline void free_part_info(struct hd_struct *part) extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); /* block/genhd.c */ -extern void device_add_disk(struct device *parent, struct gendisk *disk); +extern void device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); static inline void add_disk(struct gendisk *disk) { - device_add_disk(NULL, disk); + device_add_disk(NULL, disk, NULL); } extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk); static inline void add_disk_no_queue_reg(struct gendisk *disk) From 20b025027e1aa04584e55a71efc6957bfd029c19 Mon Sep 17 00:00:00 2001 From: UtsavBalar1231 Date: Mon, 18 Apr 2022 05:02:43 +0000 Subject: [PATCH 42/44] zram: Enable zRAM deduplication by default Signed-off-by: UtsavBalar1231 --- drivers/block/zram/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 4923185dd988..13b6f8d8e841 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -58,7 +58,7 @@ config ZRAM_DEDUP bool "Deduplication support for ZRAM data" depends on ZRAM select XXHASH - default n + default y help Deduplicate ZRAM data to reduce amount of memory consumption. Advantage largely depends on the workload. In some cases, this From 57d91d685a7023853e3f11bcbce688f690c78343 Mon Sep 17 00:00:00 2001 From: UtsavBalar1231 Date: Tue, 31 May 2022 23:44:47 -0500 Subject: [PATCH 43/44] RM64: configs: xiaomi: Remove LRU zRAM writeback and enabled zRAM writeback Change-Id: I41c109de1338d821dc14cfcac4cb44b82c26fd48 Signed-off-by: UtsavBalar1231 Signed-off-by: Carlos Ayrton Lopez Arroyo <15030201@itcelaya.edu.mx> --- arch/arm64/configs/vendor/alioth_defconfig | 1 + arch/arm64/configs/vendor/apollo_defconfig | 1 + arch/arm64/configs/vendor/cmi_defconfig | 1 + arch/arm64/configs/vendor/lmi_defconfig | 1 + arch/arm64/configs/vendor/thyme_defconfig | 1 + arch/arm64/configs/vendor/umi_defconfig | 1 + 6 files changed, 6 insertions(+) diff --git a/arch/arm64/configs/vendor/alioth_defconfig b/arch/arm64/configs/vendor/alioth_defconfig index 83aaf54c70e5..85f3f852a4a8 100644 --- a/arch/arm64/configs/vendor/alioth_defconfig +++ b/arch/arm64/configs/vendor/alioth_defconfig @@ -333,6 +333,7 @@ CONFIG_MTD_OOPS=y CONFIG_MTD_BLOCK2MTD=y CONFIG_ZRAM=y CONFIG_ZRAM_SIZE_OVERRIDE=2 +CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/vendor/apollo_defconfig b/arch/arm64/configs/vendor/apollo_defconfig index 78f4e22bc2e4..f755924e8928 100644 --- a/arch/arm64/configs/vendor/apollo_defconfig +++ b/arch/arm64/configs/vendor/apollo_defconfig @@ -337,6 +337,7 @@ CONFIG_MTD_OOPS=y CONFIG_MTD_BLOCK2MTD=y CONFIG_ZRAM=y CONFIG_ZRAM_SIZE_OVERRIDE=2 +CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/vendor/cmi_defconfig b/arch/arm64/configs/vendor/cmi_defconfig index ee81b4151c56..69e98f0268e6 100644 --- a/arch/arm64/configs/vendor/cmi_defconfig +++ b/arch/arm64/configs/vendor/cmi_defconfig @@ -341,6 +341,7 @@ CONFIG_MTD_OOPS=y CONFIG_MTD_BLOCK2MTD=y CONFIG_ZRAM=y CONFIG_ZRAM_SIZE_OVERRIDE=2 +CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/vendor/lmi_defconfig b/arch/arm64/configs/vendor/lmi_defconfig index 04ab85375397..9a15946046d2 100644 --- a/arch/arm64/configs/vendor/lmi_defconfig +++ b/arch/arm64/configs/vendor/lmi_defconfig @@ -341,6 +341,7 @@ CONFIG_MTD_OOPS=y CONFIG_MTD_BLOCK2MTD=y CONFIG_ZRAM=y CONFIG_ZRAM_SIZE_OVERRIDE=2 +CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/vendor/thyme_defconfig b/arch/arm64/configs/vendor/thyme_defconfig index 2495f1e3abdf..4e8dd6f576fc 100644 --- a/arch/arm64/configs/vendor/thyme_defconfig +++ b/arch/arm64/configs/vendor/thyme_defconfig @@ -334,6 +334,7 @@ CONFIG_MTD_OOPS=y CONFIG_MTD_BLOCK2MTD=y CONFIG_ZRAM=y CONFIG_ZRAM_SIZE_OVERRIDE=2 +CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/vendor/umi_defconfig b/arch/arm64/configs/vendor/umi_defconfig index 080364b2cf03..1e5a848363f5 100644 --- a/arch/arm64/configs/vendor/umi_defconfig +++ b/arch/arm64/configs/vendor/umi_defconfig @@ -340,6 +340,7 @@ CONFIG_MTD_OOPS=y CONFIG_MTD_BLOCK2MTD=y CONFIG_ZRAM=y CONFIG_ZRAM_SIZE_OVERRIDE=2 +CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y From efaad1e3c34b2ded3f3ae07a8313fb7931cc99fa Mon Sep 17 00:00:00 2001 From: Carlos Ayrton Lopez Arroyo <15030201@itcelaya.edu.mx> Date: Wed, 1 Jun 2022 00:06:55 -0500 Subject: [PATCH 44/44] Revert "mm: compaction: Run ZRAM compaction on automatic compaction" This reverts commit b746530f068a37d9aa58596e8d2ac293c8990b44. --- mm/compaction.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 18e718dcf27b..2d6927f2bf21 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2504,8 +2504,6 @@ static void compact_nodes(void) compact_node(nid); } -void zram_compact(void); - static void do_compaction(struct work_struct *work) { /* Return early if the screen is on */ @@ -2517,9 +2515,6 @@ static void do_compaction(struct work_struct *work) /* Do full compaction */ compact_nodes(); - /* Do ZRAM compaction */ - zram_compact(); - /* Force compaction timeout */ compaction_forced_timeout = jiffies + msecs_to_jiffies(compaction_timeout_ms);