原创 写代码的篮球球痴 嵌入式Linux 2020-03-09

收录于话题

#Linux

130个

Linux内核有一些方法可以用来方便标记bug,提供断言并输出信息。最常用的两个是BUG()和BUG_ON()。

当被调用的时候,它们会引发oops,导致栈的回溯和错误信息的打印。这些声明会导致 oops跟硬件的体系结构是相关的。大部分体系结构把BUG()和BUG_ON()定义成某种非法操作,这样自然会产生需要的oops。你可以把这些调用当作断言使用,想要断言某种情况不该发生:

if (bad_thing)
BUG(); //需要linux 内核开启General setup->Configure standard kernel features->BUG() support

或者使用更好的形式:

BUG_ON(bad_thing); 

可以用panic()引发更严重的错误。调用panic()不但会打印错误消息(Oops)而且还会挂起整个系统。显然,你只应该在极端恶劣的情况下使用它:

if (terrible_thing)
       panic("foo is %ld\n", foo);  

有些时候,你只是需要在终端上打印一下栈的回溯信息来帮助你测试。此时可以使用dump_stack()。它只在终端上打印寄存器上下文和函数的跟踪线索:

if (!debug_check) {
       printk(KERN_DEBUG "provide some information...\n");
       dump_stack();
}
举个例子程序

这个例子是参考了别人的代码,我按照这个代码执行下给大伙看看。通过触发proc下的文件来触发不同的执行函数。

/*************************************************************************
       > File Name: pro.c
       > Author:
       > Mail:
       > Created Time: 2020年03月07日 星期六 11时19分38秒
************************************************************************/

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <asm/uaccess.h>
#include <asm/types.h>

// Module to make a read entry in the proc file system.
// Module to write a command line calculator

MODULE_LICENSE("GPL");
MODULE_AUTHOR("329410527@qq.com");

#define MY_PROC_ENTRY "bugon-test"

struct proc_dir_entry *proc;
int len;
char *msg = NULL;
#define DATA_SIZE 1024 // We can keep 1024 bytes of data with us.

/*
* Function to write to the proc. Here we free the old data, and allocate new space and copy the data to
* that newly allocated area.
*/

#define MY_BUG_ON 1
#define MY_BUG 2
#define MY_DUMPSTACK 3
#define MY_PANIC 4
static int param = 100;

/*文件的写函数*/
static ssize_t my_proc_write(struct file *filp, const char __user * buffer, size_t count, loff_t *pos)
{
       char *str;
       str = kmalloc((size_t) count, GFP_KERNEL);
       if (copy_from_user(str, buffer, count)) {
               kfree(str);
               return -EFAULT;
       }
       sscanf(str, "%d", &param);
       printk("param has been set to %d\n", param);
       kfree(str);

       switch (param) {
       case MY_BUG_ON:
               BUG_ON(param);
               break;
       case MY_BUG:
               BUG();
               break;
       case MY_DUMPSTACK:
               dump_stack();
               break;
       case MY_PANIC:
               panic("I am panicking, Why? -- you told so");
               break;
       default:
               printk("unknow param...\n");
       }
       return count;
}
/*读proc文件*/
ssize_t my_proc_read(struct file *filp,char *buf,size_t count, loff_t *offp )
{
   int err;
   char *data = PDE_DATA(file_inode(filp));

   if ((int) (*offp) > len) {
       return 0;
   }
   printk(KERN_INFO "Reading the proc entry, len of the file is %d", len);

   if(!(data)) {
       printk(KERN_INFO "NULL DATA");
       return 0;
   }

   if (count == 0) {
       printk(KERN_INFO "Read of size zero, doing nothing.");
       return count;
   } else {
       printk(KERN_INFO "Read of size %d", (int) count);
   }

   count = len + 1; // +1 to read the \0
   err = copy_to_user(buf, data, count); // +1 for \0
   printk(KERN_INFO "Read data : %s", buf);
   *offp = count;

   if (err) {
       printk(KERN_INFO "Error in copying data.");
   } else {
       printk(KERN_INFO "Successfully copied data.");
   }

   return count;
}

/*proc文件系统的fops*/
struct file_operations proc_fops = {
   .read = my_proc_read,
   .write = my_proc_write,
};

int create_new_proc_entry(void) {
   int i;
   char *DATA = "Hello People";
   len = strlen(DATA);
   /*申请内存空间*/
   msg = kmalloc((size_t) DATA_SIZE, GFP_KERNEL); // +1 for \0
   if (msg != NULL) {
       printk(KERN_INFO "Allocated memory for msg");
   } else {
       return -1;
   }
   /*把字符串拷贝到msg*/
   strncpy(msg, DATA, len+1);
   for (i=0; i < len +1 ; i++) {
       printk(KERN_INFO "%c", msg[i]);
   }
   /*建立proc文件系统*/
   proc = proc_create_data(MY_PROC_ENTRY, 0666, NULL, &proc_fops, msg);
   if (proc) {
       return 0;
   }
   return -1;
}

int __init proc_bug_on_init (void)
{
   if (create_new_proc_entry())
   {
       return -1;
   }
   return 0;
}

void __exit proc_bug_on_cleanup(void) {
   remove_proc_entry(MY_PROC_ENTRY, NULL);
}

module_init(proc_bug_on_init);
module_exit(proc_bug_on_cleanup);

Makefile文件:

PWD=$(shell pwd)
VER=$(shell uname -r)
KERNEL_BUILD=/lib/modules/$(VER)/build
$(info $(PWD))
$(info $(VER))
ifneq ($(KERNELRELEASE),)
obj-m := pro.o
else
PWD  := $(shell pwd)
KVER := $(shell uname -r)
KDIR := /lib/modules/$(KVER)/build
all:
       make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
install:
       make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules_install
clean:
       make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
endif

执行make命令后生成文件

weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$ make
/ssd/weiqifa0/linux-c/pro-module
5.0.0-23-generic
make -C /lib/modules/5.0.0-23-generic/build M=/ssd/weiqifa0/linux-c/pro-module modules
make[1]: Entering directory '/usr/src/linux-headers-5.0.0-23-generic'
/usr/src/linux-headers-5.0.0-23-generic
5.0.0-23-generic
 CC [M]  /ssd/weiqifa0/linux-c/pro-module/pro.o
 Building modules, stage 2.
/usr/src/linux-headers-5.0.0-23-generic
5.0.0-23-generic
 MODPOST 1 modules
 CC      /ssd/weiqifa0/linux-c/pro-module/pro.mod.o
 LD [M]  /ssd/weiqifa0/linux-c/pro-module/pro.ko
make[1]: Leaving directory '/usr/src/linux-headers-5.0.0-23-generic'
weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$

执行加载模块

sudo insmod pro.ko

执行触发bugon操作 sudo echo 1 > /proc/bugon-test

[930845.292938] ------------[ cut here ]------------
[930845.292939] kernel BUG at /ssd/weiqifa0/linux-c/pro-module/pro.c:57!
[930845.292942] invalid opcode: 0000 [#2] SMP NOPTI
[930845.292944] CPU: 2 PID: 12116 Comm: echo Tainted: G      D    OE     5.0.0-23-generic #24~18.04.1-Ubuntu
[930845.292944] Hardware name: System manufacturer System Product Name/PRIME Z370-P II, BIOS 0602 03/14/2019
[930845.292946] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro]
[930845.292947] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01
[930845.292948] RSP: 0018:ffffb3e60a44fe50 EFLAGS: 00010246
[930845.292949] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000eec799
[930845.292949] RDX: 0000000000eec798 RSI: ffff907726aa7040 RDI: ffff907726403c80
[930845.292950] RBP: ffffb3e60a44fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8
[930845.292950] R10: ffffd547606a6f80 R11: ffffb3e60a44fcc0 R12: ffff90771a9be310
[930845.292951] R13: 000055852def8410 R14: 000055852def8410 R15: ffff907683c1f300
[930845.292952] FS:  00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000
[930845.292952] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[930845.292953] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0
[930845.292953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[930845.292954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[930845.292954] Call Trace:
[930845.292957]  proc_reg_write+0x3e/0x60
[930845.292959]  __vfs_write+0x1b/0x40
[930845.292960]  vfs_write+0xb1/0x1a0
[930845.292961]  ksys_write+0x5c/0xe0
[930845.292962]  __x64_sys_write+0x1a/0x20
[930845.292964]  do_syscall_64+0x5a/0x120
[930845.292966]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[930845.292967] RIP: 0033:0x7f6b9f8ff024
[930845.292968] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 b9 d3 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53
[930845.292968] RSP: 002b:00007fff9b739518 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[930845.292969] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f6b9f8ff024
[930845.292970] RDX: 0000000000000002 RSI: 000055852def8410 RDI: 0000000000000001
[930845.292970] RBP: 000055852def8410 R08: 00007f6b9f9d9580 R09: 00007f6b9f9de580
[930845.292971] R10: 00007f6b9f9d6ca0 R11: 0000000000000246 R12: 00007f6b9f9d7760
[930845.292971] R13: 0000000000000002 R14: 00007f6b9f9d8560 R15: 00007f6b9f9d7960
[930845.292972] Modules linked in: pro(OE) tcp_diag inet_diag snd_hda_codec_realtek snd_hda_codec_generic amdgpu ledtrig_audio chash amd_iommu_v2 gpu_sched intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi aesni_intel snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi i915 snd_seq aes_x86_64 crypto_simd snd_seq_device cryptd glue_helper snd_timer kvmgt eeepc_wmi intel_cstate nls_iso8859_1 vfio_mdev asus_wmi intel_rapl_perf input_leds radeon wmi_bmof snd joydev sparse_keymap mxm_wmi mdev vfio_iommu_type1 ttm vfio soundcore kvm irqbypass drm_kms_helper drm mei_me i2c_algo_bit mei fb_sys_fops syscopyarea sysfillrect sysimgblt mac_hid acpi_pad sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid nvme r8169 ahci realtek nvme_core libahci wmi video [last unloaded: pro]
[930845.292992] ---[ end trace 622fbd2856be7806 ]---
[930845.292993] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro]
[930845.292994] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01
[930845.292994] RSP: 0018:ffffb3e60893fe50 EFLAGS: 00010246
[930845.292995] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000e7affd
[930845.292996] RDX: 0000000000e7affc RSI: ffff907726ba7040 RDI: ffff907726403c80
[930845.292996] RBP: ffffb3e60893fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8
[930845.292997] R10: ffffd547607934c0 R11: 0000000000000001 R12: ffff90771e4d37e8
[930845.292997] R13: 00005585c2683050 R14: 00005585c2683050 R15: ffff907721602200
[930845.292998] FS:  00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000
[930845.292998] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[930845.292999] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0
[930845.292999] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[930845.293000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

其他的操作也是一样。

大家在调试过程中,可以试试这个方法。在自己的异常代码加上后,如果有其他调用导致。就可以看到是谁的调用导致的。

我们看看BUG_ON()定义的位置

kernel/include/asm-generic/bug.h
/*
* Don't use BUG() or BUG_ON() unless there's really no way out; one
* example might be detecting data structure corruption in the middle
* of an operation that can't be backed out of.  If the (sub)system
* can somehow continue operating, perhaps with reduced functionality,
* it's probably not BUG-worthy.
*
* If you're tempted to BUG(), think again:  is completely giving up
* really the *only* solution?  There are usually better options, where
* users don't need to reboot ASAP and can mostly shut down cleanly.
*/
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
   printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
   barrier_before_unreachable(); \
   panic("BUG!"); \
} while (0)
#endif

#ifndef HAVE_ARCH_BUG_ON
#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
#endif

里面的注释写的很明白,如果你有其他的办法,建议不要使用BUG_ON()。