Skip to content

Commit

Permalink
cet_ioctl: fix one BUG issue after unload cet_ioctl driver
Browse files Browse the repository at this point in the history
Unload cet_ioctl driver will meet BUG as following:
"
[  203.429071] Will unload driver.
[  203.429076] Class pointer 'cl' is valid: 000000002a4ca71a, *cl:000000002a4ca71a
[  203.429078] Device pointer 'dev' is valid &dev:0000000045468ace, dev:532676608
[  203.429244] BUG: unable to handle page fault for address: ffffc9000447fbd4
[  203.429247] #PF: supervisor read access in kernel mode
[  203.429248] #PF: error_code(0x0000) - not-present page
[  203.429249] PGD 100000067 P4D 100000067 PUD 100285067 PMD 126d0b067 PTE 0
[  203.429253] Oops: 0000 [#1] PREEMPT SMP NOPTI
[  203.429256] CPU: 8 PID: 4469 Comm: rmmod Tainted: G           O       6.7.0-rc7-asymmetric5+ #15
[  203.429258] Hardware name: Gigabyte Technology Co., Ltd. X299 UD4 Pro/X299 UD4 Pro-CF, BIOS F8a 04/27/2021
[  203.429259] RIP: 0010:string_nocheck+0x16/0x90
[  203.429266] Code: 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 55 49 89 f2 48 89 ce 48 89 f8 48 c1 fe 30 48 89 e5 66 85 f6 74 5b <44> 0f b6 0a 45 84 c9 74 52 83 ee 01 41 b8 01 00 00 00 48 8d 7c 37
[  203.429268] RSP: 0018:ffffc90003997c88 EFLAGS: 00010286
[  203.429270] RAX: ffff888101f7e255 RBX: ffff888101f7ea1c RCX: ffff0a00ffffff04
[  203.429271] RDX: ffffc9000447fbd4 RSI: ffffffffffffffff RDI: ffff888101f7e255
[  203.429272] RBP: ffffc90003997c88 R08: 4554535953425553 R09: 3d4d455453595342
[  203.429273] R10: ffff888101f7ea1c R11: 0000000000000000 R12: ffffc9000447fbd4
[  203.429274] R13: ffff0a00ffffff04 R14: ffffffff98caa0f7 R15: ffff888101f7e255
[  203.429275] FS:  00007f914eb9d740(0000) GS:ffff88a01fa00000(0000) knlGS:0000000000000000
[  203.429276] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  203.429277] CR2: ffffc9000447fbd4 CR3: 00000001f3334002 CR4: 00000000003706f0
[  203.429278] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  203.429279] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  203.429280] Call Trace:
[  203.429281]  <TASK>
[  203.429283]  ? show_regs+0x66/0x70
[  203.429291]  ? __die_body+0x23/0x70
[  203.429293]  ? __die+0x2e/0x40
[  203.429295]  ? page_fault_oops+0x160/0x480
[  203.429299]  ? search_bpf_extables+0x64/0x90
[  203.429304]  ? string_nocheck+0x16/0x90
[  203.429305]  ? search_exception_tables+0x61/0x70
[  203.429310]  ? kernelmode_fixup_or_oops+0xa2/0x120
[  203.429311]  ? __bad_area_nosemaphore+0x17a/0x230
[  203.429312]  ? bad_area_nosemaphore+0x16/0x20
[  203.429314]  ? do_kern_addr_fault+0x8e/0xa0
[  203.429315]  ? exc_page_fault+0x18b/0x190
[  203.429319]  ? asm_exc_page_fault+0x2b/0x30
[  203.429323]  ? string_nocheck+0x16/0x90
[  203.429325]  ? string_nocheck+0x6b/0x90
[  203.429326]  string+0x4c/0x60
[  203.429328]  vsnprintf+0x340/0x530
[  203.429331]  add_uevent_var+0x86/0x110
[  203.429335]  ? kobject_get_path+0x96/0x100
[  203.429337]  kobject_uevent_env+0x21e/0x7a0
[  203.429339]  ? preempt_count_add+0x80/0xc0
[  203.429344]  ? _raw_spin_lock_irqsave+0x2c/0x60
[  203.429348]  kobject_uevent+0xf/0x20
[  203.429350]  device_del+0x261/0x3a0
[  203.429355]  device_unregister+0x1b/0x60
[  203.429357]  device_destroy+0x4e/0x70
[  203.429361]  cet_ioctl_exit+0x5b/0xcc0 [cet_ioctl]
[  203.429365]  __x64_sys_delete_module+0x140/0x240
[  203.429369]  do_syscall_64+0x42/0xf0
[  203.429372]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
[  203.429374] RIP: 0033:0x7f914d23a14b
[  203.429376] Code: 73 01 c3 48 8b 0d 3d 4d 38 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 0d 4d 38 00 f7 d8 64 89 01 48
[  203.429377] RSP: 002b:00007ffd3dbae018 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
[  203.429379] RAX: ffffffffffffffda RBX: 000055925af7d7b0 RCX: 00007f914d23a14b
[  203.429380] RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055925af7d818
[  203.429381] RBP: 0000000000000000 R08: 00007ffd3dbacf91 R09: 0000000000000000
[  203.429382] R10: 00007f914d370840 R11: 0000000000000206 R12: 00007ffd3dbae230
[  203.429383] R13: 00007ffd3dbae9b3 R14: 000055925af7d2a0 R15: 000055925af7d7b0
[  203.429384]  </TASK>
[  203.429385] Modules linked in: cet_ioctl(O-) nf_conntrack_netlink xfrm_user xfrm_algo xt_addrtype br_netfilter xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp nft_compat x_tables nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink bridge stp llc nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache netfs overlay sunrpc intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio nfit snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec kvm_intel snd_hda_core snd_hwdep snd_seq kvm snd_seq_device snd_pcm irqbypass snd_timer rapl ioatdma mei_me i2c_i801 snd joydev input_leds intel_wmi_thunderbolt intel_cstate wmi_bmof pcspkr mei soundcore dca i2c_smbus mxm_wmi mac_hid sch_fq_codel vmxmon(O) x86_cpu_cache(O) kmsi(O) dmap(O) amdgpu crct10dif_pclmul crc32_pclmul hid_microsoft
[  203.429433]  ghash_clmulni_intel ff_memless drm_ttm_helper sha512_ssse3 drm_exec hid_generic drm_suballoc_helper sha256_ssse3 amdxcp usbkbd sha1_ssse3 wdat_wdt gpu_sched e1000e usbhid hid aesni_intel crypto_simd cryptd
[  203.429445] CR2: ffffc9000447fbd4
[  203.429447] ---[ end trace 0000000000000000 ]---
"
Because mod_name string is allocated from stack in function cet_ioctl_init,
when cet_ioctl_exit function above stack has been released and triggered this
BUG issue, so used cet_ioctl string directly to solve this issue, and it's
the "kernel" way also instead of static global way.
Add more debug info to help locate driver issues also.

Suggested-by: Mika Westerberg <[email protected]>
Signed-off-by: Pengfei Xu <[email protected]>
  • Loading branch information
xupengfe authored and ysun committed Sep 6, 2024
1 parent de35c97 commit 0ee6842
Showing 1 changed file with 26 additions and 4 deletions.
30 changes: 26 additions & 4 deletions BM/cet/cet_driver/cet_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@ static struct class *cl;

static int my_open(struct inode *i, struct file *f)
{
pr_info("Device opened\n");
return 0;
}

static int my_close(struct inode *i, struct file *f)
{
pr_info("Device closed\n");
return 0;
}

Expand Down Expand Up @@ -155,23 +157,31 @@ static int __init cet_ioctl_init(void)

pr_info("Load cet_ioctl start\n");
ret = alloc_chrdev_region(&dev, FIRST_MINOR, MINOR_CNT, mod_name);
if (ret < 0)
if (ret < 0) {
pr_info("alloc_chrdev_region failed\n");
return ret;
}

cdev_init(&c_dev, &query_fops);

ret = cdev_add(&c_dev, dev, MINOR_CNT);
if (ret < 0)
if (ret < 0) {
pr_info("cdev_add failed\n");
return ret;
}

/*
* From v6.3-rc1: dcfbb67e48a2becfce7990386e985b9c45098ee5,
* there is no second parameter for class_create(const char *name).
* If the host kernel version is lower than v6.3-rc1, please change
* the code as follows:
* cl = class_create(THIS_MODULE, "char");
* mod_name is allocated from stack so released after this function.
* Othwise it will cause "BUG: unable to handle page fault for address:
* ffffc9000447fbd4" issue. So use string instead and it's "kernel" way:
* use plain string here.
*/
cl = class_create(mod_name);
cl = class_create("cet_ioctl");
if (IS_ERR(cl)) {
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
Expand All @@ -191,11 +201,23 @@ static int __init cet_ioctl_init(void)

static void __exit cet_ioctl_exit(void)
{
pr_info("Unload cet_ioctl, bye.\n");
pr_info("Will unload driver.\n");
if (!cl)
pr_err("Class pointer 'cl' is NULL\n");
else
pr_info("Class pointer 'cl' is valid: cl:%p\n", (void *)cl);

if (!dev)
pr_err("Device pointer 'dev' is NULL\n");
else
pr_info("Device pointer 'dev' is valid &dev:%p, dev:%d\n", &dev, dev);

device_destroy(cl, dev);
pr_info("Device destroyed\n");
class_destroy(cl);
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
pr_info("Driver uninstall completed.\n");
}

module_init(cet_ioctl_init);
Expand Down

0 comments on commit 0ee6842

Please sign in to comment.