From 40d9bbb083f1a148e1e35599ef64dad14ad27f3a Mon Sep 17 00:00:00 2001 From: Pengfei Xu Date: Thu, 5 Sep 2024 22:09:50 +0800 Subject: [PATCH] cet_ioctl: fix one BUG issue after unload cet_ioctl driver Unload cet_ioctl driver will meet BUG as following: " [ 203.429071] Will unload driver. [ 203.429076] Class pointer 'cl' is valid: 000000002a4ca71a, *cl:000000002a4ca71a [ 203.429078] Device pointer 'dev' is valid &dev:0000000045468ace, dev:532676608 [ 203.429244] BUG: unable to handle page fault for address: ffffc9000447fbd4 [ 203.429247] #PF: supervisor read access in kernel mode [ 203.429248] #PF: error_code(0x0000) - not-present page [ 203.429249] PGD 100000067 P4D 100000067 PUD 100285067 PMD 126d0b067 PTE 0 [ 203.429253] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 203.429256] CPU: 8 PID: 4469 Comm: rmmod Tainted: G O 6.7.0-rc7-asymmetric5+ #15 [ 203.429258] Hardware name: Gigabyte Technology Co., Ltd. X299 UD4 Pro/X299 UD4 Pro-CF, BIOS F8a 04/27/2021 [ 203.429259] RIP: 0010:string_nocheck+0x16/0x90 [ 203.429266] Code: 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 55 49 89 f2 48 89 ce 48 89 f8 48 c1 fe 30 48 89 e5 66 85 f6 74 5b <44> 0f b6 0a 45 84 c9 74 52 83 ee 01 41 b8 01 00 00 00 48 8d 7c 37 [ 203.429268] RSP: 0018:ffffc90003997c88 EFLAGS: 00010286 [ 203.429270] RAX: ffff888101f7e255 RBX: ffff888101f7ea1c RCX: ffff0a00ffffff04 [ 203.429271] RDX: ffffc9000447fbd4 RSI: ffffffffffffffff RDI: ffff888101f7e255 [ 203.429272] RBP: ffffc90003997c88 R08: 4554535953425553 R09: 3d4d455453595342 [ 203.429273] R10: ffff888101f7ea1c R11: 0000000000000000 R12: ffffc9000447fbd4 [ 203.429274] R13: ffff0a00ffffff04 R14: ffffffff98caa0f7 R15: ffff888101f7e255 [ 203.429275] FS: 00007f914eb9d740(0000) GS:ffff88a01fa00000(0000) knlGS:0000000000000000 [ 203.429276] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 203.429277] CR2: ffffc9000447fbd4 CR3: 00000001f3334002 CR4: 00000000003706f0 [ 203.429278] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 203.429279] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 203.429280] Call Trace: [ 203.429281] [ 203.429283] ? show_regs+0x66/0x70 [ 203.429291] ? __die_body+0x23/0x70 [ 203.429293] ? __die+0x2e/0x40 [ 203.429295] ? page_fault_oops+0x160/0x480 [ 203.429299] ? search_bpf_extables+0x64/0x90 [ 203.429304] ? string_nocheck+0x16/0x90 [ 203.429305] ? search_exception_tables+0x61/0x70 [ 203.429310] ? kernelmode_fixup_or_oops+0xa2/0x120 [ 203.429311] ? __bad_area_nosemaphore+0x17a/0x230 [ 203.429312] ? bad_area_nosemaphore+0x16/0x20 [ 203.429314] ? do_kern_addr_fault+0x8e/0xa0 [ 203.429315] ? exc_page_fault+0x18b/0x190 [ 203.429319] ? asm_exc_page_fault+0x2b/0x30 [ 203.429323] ? string_nocheck+0x16/0x90 [ 203.429325] ? string_nocheck+0x6b/0x90 [ 203.429326] string+0x4c/0x60 [ 203.429328] vsnprintf+0x340/0x530 [ 203.429331] add_uevent_var+0x86/0x110 [ 203.429335] ? kobject_get_path+0x96/0x100 [ 203.429337] kobject_uevent_env+0x21e/0x7a0 [ 203.429339] ? preempt_count_add+0x80/0xc0 [ 203.429344] ? _raw_spin_lock_irqsave+0x2c/0x60 [ 203.429348] kobject_uevent+0xf/0x20 [ 203.429350] device_del+0x261/0x3a0 [ 203.429355] device_unregister+0x1b/0x60 [ 203.429357] device_destroy+0x4e/0x70 [ 203.429361] cet_ioctl_exit+0x5b/0xcc0 [cet_ioctl] [ 203.429365] __x64_sys_delete_module+0x140/0x240 [ 203.429369] do_syscall_64+0x42/0xf0 [ 203.429372] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 203.429374] RIP: 0033:0x7f914d23a14b [ 203.429376] Code: 73 01 c3 48 8b 0d 3d 4d 38 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 0d 4d 38 00 f7 d8 64 89 01 48 [ 203.429377] RSP: 002b:00007ffd3dbae018 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 203.429379] RAX: ffffffffffffffda RBX: 000055925af7d7b0 RCX: 00007f914d23a14b [ 203.429380] RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055925af7d818 [ 203.429381] RBP: 0000000000000000 R08: 00007ffd3dbacf91 R09: 0000000000000000 [ 203.429382] R10: 00007f914d370840 R11: 0000000000000206 R12: 00007ffd3dbae230 [ 203.429383] R13: 00007ffd3dbae9b3 R14: 000055925af7d2a0 R15: 000055925af7d7b0 [ 203.429384] [ 203.429385] Modules linked in: cet_ioctl(O-) nf_conntrack_netlink xfrm_user xfrm_algo xt_addrtype br_netfilter xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp nft_compat x_tables nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink bridge stp llc nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache netfs overlay sunrpc intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio nfit snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec kvm_intel snd_hda_core snd_hwdep snd_seq kvm snd_seq_device snd_pcm irqbypass snd_timer rapl ioatdma mei_me i2c_i801 snd joydev input_leds intel_wmi_thunderbolt intel_cstate wmi_bmof pcspkr mei soundcore dca i2c_smbus mxm_wmi mac_hid sch_fq_codel vmxmon(O) x86_cpu_cache(O) kmsi(O) dmap(O) amdgpu crct10dif_pclmul crc32_pclmul hid_microsoft [ 203.429433] ghash_clmulni_intel ff_memless drm_ttm_helper sha512_ssse3 drm_exec hid_generic drm_suballoc_helper sha256_ssse3 amdxcp usbkbd sha1_ssse3 wdat_wdt gpu_sched e1000e usbhid hid aesni_intel crypto_simd cryptd [ 203.429445] CR2: ffffc9000447fbd4 [ 203.429447] ---[ end trace 0000000000000000 ]--- " Because mod_name string is allocated from stack in function cet_ioctl_init, when cet_ioctl_exit function above stack has been released and triggered this BUG issue, so used cet_ioctl string directly to solve this issue, and it's the "kernel" way also instead of static global way. Add more debug info to help locate driver issues also. Suggested-by: Mika Westerberg Signed-off-by: Pengfei Xu --- BM/cet/cet_driver/cet_ioctl.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/BM/cet/cet_driver/cet_ioctl.c b/BM/cet/cet_driver/cet_ioctl.c index f1b8b971..17fca19c 100644 --- a/BM/cet/cet_driver/cet_ioctl.c +++ b/BM/cet/cet_driver/cet_ioctl.c @@ -30,11 +30,13 @@ static struct class *cl; static int my_open(struct inode *i, struct file *f) { + pr_info("Device opened\n"); return 0; } static int my_close(struct inode *i, struct file *f) { + pr_info("Device closed\n"); return 0; } @@ -155,14 +157,18 @@ static int __init cet_ioctl_init(void) pr_info("Load cet_ioctl start\n"); ret = alloc_chrdev_region(&dev, FIRST_MINOR, MINOR_CNT, mod_name); - if (ret < 0) + if (ret < 0) { + pr_info("alloc_chrdev_region failed\n"); return ret; + } cdev_init(&c_dev, &query_fops); ret = cdev_add(&c_dev, dev, MINOR_CNT); - if (ret < 0) + if (ret < 0) { + pr_info("cdev_add failed\n"); return ret; + } /* * From v6.3-rc1: dcfbb67e48a2becfce7990386e985b9c45098ee5, @@ -170,8 +176,12 @@ static int __init cet_ioctl_init(void) * If the host kernel version is lower than v6.3-rc1, please change * the code as follows: * cl = class_create(THIS_MODULE, "char"); + * mod_name is allocated from stack so released after this function. + * Othwise it will cause "BUG: unable to handle page fault for address: + * ffffc9000447fbd4" issue. So use string instead and it's "kernel" way: + * use plain string here. */ - cl = class_create(mod_name); + cl = class_create("cet_ioctl"); if (IS_ERR(cl)) { cdev_del(&c_dev); unregister_chrdev_region(dev, MINOR_CNT); @@ -191,11 +201,23 @@ static int __init cet_ioctl_init(void) static void __exit cet_ioctl_exit(void) { - pr_info("Unload cet_ioctl, bye.\n"); + pr_info("Will unload driver.\n"); + if (!cl) + pr_err("Class pointer 'cl' is NULL\n"); + else + pr_info("Class pointer 'cl' is valid: cl:%p\n", (void *)cl); + + if (!dev) + pr_err("Device pointer 'dev' is NULL\n"); + else + pr_info("Device pointer 'dev' is valid &dev:%p, dev:%d\n", &dev, dev); + device_destroy(cl, dev); + pr_info("Device destroyed\n"); class_destroy(cl); cdev_del(&c_dev); unregister_chrdev_region(dev, MINOR_CNT); + pr_info("Driver uninstall completed.\n"); } module_init(cet_ioctl_init);