原创 写代码的篮球球痴 嵌入式Linux 2020-03-09
收录于话题
#Linux
130个
Linux内核有一些方法可以用来方便标记bug,提供断言并输出信息。最常用的两个是BUG()和BUG_ON()。
当被调用的时候,它们会引发oops,导致栈的回溯和错误信息的打印。这些声明会导致 oops跟硬件的体系结构是相关的。大部分体系结构把BUG()和BUG_ON()定义成某种非法操作,这样自然会产生需要的oops。你可以把这些调用当作断言使用,想要断言某种情况不该发生:
if (bad_thing)
BUG(); //需要linux 内核开启General setup->Configure standard kernel features->BUG() support
或者使用更好的形式:
BUG_ON(bad_thing);
可以用panic()引发更严重的错误。调用panic()不但会打印错误消息(Oops)而且还会挂起整个系统。显然,你只应该在极端恶劣的情况下使用它:
if (terrible_thing)
panic("foo is %ld\n", foo);
有些时候,你只是需要在终端上打印一下栈的回溯信息来帮助你测试。此时可以使用dump_stack()。它只在终端上打印寄存器上下文和函数的跟踪线索:
if (!debug_check) {
printk(KERN_DEBUG "provide some information...\n");
dump_stack();
}
举个例子程序这个例子是参考了别人的代码,我按照这个代码执行下给大伙看看。通过触发proc下的文件来触发不同的执行函数。
/*************************************************************************
> File Name: pro.c
> Author:
> Mail:
> Created Time: 2020年03月07日 星期六 11时19分38秒
************************************************************************/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <asm/uaccess.h>
#include <asm/types.h>
// Module to make a read entry in the proc file system.
// Module to write a command line calculator
MODULE_LICENSE("GPL");
MODULE_AUTHOR("329410527@qq.com");
#define MY_PROC_ENTRY "bugon-test"
struct proc_dir_entry *proc;
int len;
char *msg = NULL;
#define DATA_SIZE 1024 // We can keep 1024 bytes of data with us.
/*
* Function to write to the proc. Here we free the old data, and allocate new space and copy the data to
* that newly allocated area.
*/
#define MY_BUG_ON 1
#define MY_BUG 2
#define MY_DUMPSTACK 3
#define MY_PANIC 4
static int param = 100;
/*文件的写函数*/
static ssize_t my_proc_write(struct file *filp, const char __user * buffer, size_t count, loff_t *pos)
{
char *str;
str = kmalloc((size_t) count, GFP_KERNEL);
if (copy_from_user(str, buffer, count)) {
kfree(str);
return -EFAULT;
}
sscanf(str, "%d", ¶m);
printk("param has been set to %d\n", param);
kfree(str);
switch (param) {
case MY_BUG_ON:
BUG_ON(param);
break;
case MY_BUG:
BUG();
break;
case MY_DUMPSTACK:
dump_stack();
break;
case MY_PANIC:
panic("I am panicking, Why? -- you told so");
break;
default:
printk("unknow param...\n");
}
return count;
}
/*读proc文件*/
ssize_t my_proc_read(struct file *filp,char *buf,size_t count, loff_t *offp )
{
int err;
char *data = PDE_DATA(file_inode(filp));
if ((int) (*offp) > len) {
return 0;
}
printk(KERN_INFO "Reading the proc entry, len of the file is %d", len);
if(!(data)) {
printk(KERN_INFO "NULL DATA");
return 0;
}
if (count == 0) {
printk(KERN_INFO "Read of size zero, doing nothing.");
return count;
} else {
printk(KERN_INFO "Read of size %d", (int) count);
}
count = len + 1; // +1 to read the \0
err = copy_to_user(buf, data, count); // +1 for \0
printk(KERN_INFO "Read data : %s", buf);
*offp = count;
if (err) {
printk(KERN_INFO "Error in copying data.");
} else {
printk(KERN_INFO "Successfully copied data.");
}
return count;
}
/*proc文件系统的fops*/
struct file_operations proc_fops = {
.read = my_proc_read,
.write = my_proc_write,
};
int create_new_proc_entry(void) {
int i;
char *DATA = "Hello People";
len = strlen(DATA);
/*申请内存空间*/
msg = kmalloc((size_t) DATA_SIZE, GFP_KERNEL); // +1 for \0
if (msg != NULL) {
printk(KERN_INFO "Allocated memory for msg");
} else {
return -1;
}
/*把字符串拷贝到msg*/
strncpy(msg, DATA, len+1);
for (i=0; i < len +1 ; i++) {
printk(KERN_INFO "%c", msg[i]);
}
/*建立proc文件系统*/
proc = proc_create_data(MY_PROC_ENTRY, 0666, NULL, &proc_fops, msg);
if (proc) {
return 0;
}
return -1;
}
int __init proc_bug_on_init (void)
{
if (create_new_proc_entry())
{
return -1;
}
return 0;
}
void __exit proc_bug_on_cleanup(void) {
remove_proc_entry(MY_PROC_ENTRY, NULL);
}
module_init(proc_bug_on_init);
module_exit(proc_bug_on_cleanup);
Makefile文件:
PWD=$(shell pwd)
VER=$(shell uname -r)
KERNEL_BUILD=/lib/modules/$(VER)/build
$(info $(PWD))
$(info $(VER))
ifneq ($(KERNELRELEASE),)
obj-m := pro.o
else
PWD := $(shell pwd)
KVER := $(shell uname -r)
KDIR := /lib/modules/$(KVER)/build
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
install:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules_install
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
endif
执行make命令后生成文件
weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$ make
/ssd/weiqifa0/linux-c/pro-module
5.0.0-23-generic
make -C /lib/modules/5.0.0-23-generic/build M=/ssd/weiqifa0/linux-c/pro-module modules
make[1]: Entering directory '/usr/src/linux-headers-5.0.0-23-generic'
/usr/src/linux-headers-5.0.0-23-generic
5.0.0-23-generic
CC [M] /ssd/weiqifa0/linux-c/pro-module/pro.o
Building modules, stage 2.
/usr/src/linux-headers-5.0.0-23-generic
5.0.0-23-generic
MODPOST 1 modules
CC /ssd/weiqifa0/linux-c/pro-module/pro.mod.o
LD [M] /ssd/weiqifa0/linux-c/pro-module/pro.ko
make[1]: Leaving directory '/usr/src/linux-headers-5.0.0-23-generic'
weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$
执行加载模块
sudo insmod pro.ko
执行触发bugon操作 sudo echo 1 > /proc/bugon-test
[930845.292938] ------------[ cut here ]------------
[930845.292939] kernel BUG at /ssd/weiqifa0/linux-c/pro-module/pro.c:57!
[930845.292942] invalid opcode: 0000 [#2] SMP NOPTI
[930845.292944] CPU: 2 PID: 12116 Comm: echo Tainted: G D OE 5.0.0-23-generic #24~18.04.1-Ubuntu
[930845.292944] Hardware name: System manufacturer System Product Name/PRIME Z370-P II, BIOS 0602 03/14/2019
[930845.292946] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro]
[930845.292947] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01
[930845.292948] RSP: 0018:ffffb3e60a44fe50 EFLAGS: 00010246
[930845.292949] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000eec799
[930845.292949] RDX: 0000000000eec798 RSI: ffff907726aa7040 RDI: ffff907726403c80
[930845.292950] RBP: ffffb3e60a44fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8
[930845.292950] R10: ffffd547606a6f80 R11: ffffb3e60a44fcc0 R12: ffff90771a9be310
[930845.292951] R13: 000055852def8410 R14: 000055852def8410 R15: ffff907683c1f300
[930845.292952] FS: 00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000
[930845.292952] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[930845.292953] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0
[930845.292953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[930845.292954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[930845.292954] Call Trace:
[930845.292957] proc_reg_write+0x3e/0x60
[930845.292959] __vfs_write+0x1b/0x40
[930845.292960] vfs_write+0xb1/0x1a0
[930845.292961] ksys_write+0x5c/0xe0
[930845.292962] __x64_sys_write+0x1a/0x20
[930845.292964] do_syscall_64+0x5a/0x120
[930845.292966] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[930845.292967] RIP: 0033:0x7f6b9f8ff024
[930845.292968] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 b9 d3 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53
[930845.292968] RSP: 002b:00007fff9b739518 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[930845.292969] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f6b9f8ff024
[930845.292970] RDX: 0000000000000002 RSI: 000055852def8410 RDI: 0000000000000001
[930845.292970] RBP: 000055852def8410 R08: 00007f6b9f9d9580 R09: 00007f6b9f9de580
[930845.292971] R10: 00007f6b9f9d6ca0 R11: 0000000000000246 R12: 00007f6b9f9d7760
[930845.292971] R13: 0000000000000002 R14: 00007f6b9f9d8560 R15: 00007f6b9f9d7960
[930845.292972] Modules linked in: pro(OE) tcp_diag inet_diag snd_hda_codec_realtek snd_hda_codec_generic amdgpu ledtrig_audio chash amd_iommu_v2 gpu_sched intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi aesni_intel snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi i915 snd_seq aes_x86_64 crypto_simd snd_seq_device cryptd glue_helper snd_timer kvmgt eeepc_wmi intel_cstate nls_iso8859_1 vfio_mdev asus_wmi intel_rapl_perf input_leds radeon wmi_bmof snd joydev sparse_keymap mxm_wmi mdev vfio_iommu_type1 ttm vfio soundcore kvm irqbypass drm_kms_helper drm mei_me i2c_algo_bit mei fb_sys_fops syscopyarea sysfillrect sysimgblt mac_hid acpi_pad sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid nvme r8169 ahci realtek nvme_core libahci wmi video [last unloaded: pro]
[930845.292992] ---[ end trace 622fbd2856be7806 ]---
[930845.292993] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro]
[930845.292994] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01
[930845.292994] RSP: 0018:ffffb3e60893fe50 EFLAGS: 00010246
[930845.292995] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000e7affd
[930845.292996] RDX: 0000000000e7affc RSI: ffff907726ba7040 RDI: ffff907726403c80
[930845.292996] RBP: ffffb3e60893fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8
[930845.292997] R10: ffffd547607934c0 R11: 0000000000000001 R12: ffff90771e4d37e8
[930845.292997] R13: 00005585c2683050 R14: 00005585c2683050 R15: ffff907721602200
[930845.292998] FS: 00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000
[930845.292998] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[930845.292999] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0
[930845.292999] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[930845.293000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
其他的操作也是一样。
大家在调试过程中,可以试试这个方法。在自己的异常代码加上后,如果有其他调用导致。就可以看到是谁的调用导致的。
我们看看BUG_ON()定义的位置
kernel/include/asm-generic/bug.h
/*
* Don't use BUG() or BUG_ON() unless there's really no way out; one
* example might be detecting data structure corruption in the middle
* of an operation that can't be backed out of. If the (sub)system
* can somehow continue operating, perhaps with reduced functionality,
* it's probably not BUG-worthy.
*
* If you're tempted to BUG(), think again: is completely giving up
* really the *only* solution? There are usually better options, where
* users don't need to reboot ASAP and can mostly shut down cleanly.
*/
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
barrier_before_unreachable(); \
panic("BUG!"); \
} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
#endif
里面的注释写的很明白,如果你有其他的办法,建议不要使用BUG_ON()。