简介

在计算机系统中,我们是以字节为单位的,每个地址单元都对应着一个字节,一个字节为 8bit。但是在C语言中除了8bit的char之外,还有16bit的short型,32bit的long型(要看具体的编译器),另外,对于位数大于 8位的处理器,例如16位或者32位的处理器,由于寄存器宽度大于一个字节,那么必然存在着一个如何将多个字节安排的问题。因此就导致了大端存储模式和小端存储模式。考虑一个short整数0xAF32(0x32是低位,0xAF是高位),把它赋值给一个short变量,那么它在内存中的存储可能有如下两种情况:

  • 大端字节(Big-endian):较高的有效字节存放在较低的存储器地址,较低的有效字节存放在较高的存储器地址。
  • 小端字节(Big-endian):字数据的高字节存储在高地址中,而字数据的低字节则存放在低地址中。
地址    0x2000  0x2001
        +-+-+-+-+-+-+-+-+
大端存储 |  0xAF |  0x32 |
        +-+-+-+-+-+-+-+-+
小端存储 |  0x32 |  0xAF |
        +-+-+-+-+-+-+-+-+

C函数判断大小端

判断计算机的存储方式:

// 是小端模式则返回1,否则返回0
int is_little_endian()
{
	union w
	{
		int x ;
		char y ;
	}c ;
	c.x = 1;
	return (c.y==1);
}

大端模式和小端模式转换

#include<stdio.h>

typedef unsigned int uint_32 ;
typedef unsigned short uint_16 ;

#define BSWAP_16(x) \
    (uint_16)((((uint_16)(x) & 0x00ff) << 8) | \
	          (((uint_16)(x) & 0xff00) >> 8) \
			 )
#define BSWAP_32(x) \
    (uint_32)((((uint_32)(x) & 0xff000000) >> 24) | \
	          (((uint_32)(x) & 0x00ff0000) >> 8) | \
			  (((uint_32)(x) & 0x0000ff00) << 8) | \
			  (((uint_32)(x) & 0x000000ff) << 24) \
			 )

uint_16 bswap_16(uint_16 x)
{
	return (((uint_16)(x) & 0x00ff) << 8) | \
	       (((uint_16)(x) & 0xff00) >> 8) ;
}
uint_32 bswap_32(uint_32 x)
{
	return (((uint_32)(x) & 0xff000000) >> 24) | \
	       (((uint_32)(x) & 0x00ff0000) >> 8) | \
		   (((uint_32)(x) & 0x0000ff00) << 8) | \
		   (((uint_32)(x) & 0x000000ff) << 24) ;
}
int main(int argc,char *argv[])
{
	printf("------------带参宏-------------\n");
	printf("%#x\n",BSWAP_32(0x12345678));
	printf("%#x\n",BSWAP_16(0x1234)) ;
	printf("------------函数调用-----------\n");
	printf("%#x\n",bswap_32(0x12345678));
	printf("%#x\n",bswap_16(0x1234)) ;
	
	return 0 ;
}

标准库是如何识别大小端模式的

在系统头文件/usr/include/bits/endian.h中定义表示大小端的宏变量,如

# cat /usr/include/bits/endian.h 
/* i386/x86_64 are little-endian.  */

#ifndef _ENDIAN_H
# error "Never use <bits/endian.h> directly; include <endian.h> instead."
#endif

#define __BYTE_ORDER __LITTLE_ENDIAN

应用可以参考/usr/include/netinet/tcp.h/中结构体的定义,如

struct tcphdr
  {
    u_int16_t source;
    u_int16_t dest;
    u_int32_t seq;
    u_int32_t ack_seq;
#if __BYTE_ORDER == __LITTLE_ENDIAN
    u_int16_t res1:4;
    u_int16_t doff:4;
    u_int16_t fin:1;
    u_int16_t syn:1;
    u_int16_t rst:1;
    u_int16_t psh:1;
    u_int16_t ack:1;
    u_int16_t urg:1;
    u_int16_t res2:2;
#elif __BYTE_ORDER == __BIG_ENDIAN
    u_int16_t doff:4;
    u_int16_t res1:4;
    u_int16_t res2:2;
    u_int16_t urg:1;
    u_int16_t ack:1;
    u_int16_t psh:1;
    u_int16_t rst:1;
    u_int16_t syn:1;
    u_int16_t fin:1;
#else
#error "Adjust your <bits/endian.h> defines"
#endif
    u_int16_t window;
    u_int16_t check;
    u_int16_t urg_ptr;
};

两种模式的使用现状

Intel的80x86系列芯片是唯一还在坚持使用小端的芯片,ARM芯片默认采用小端,但可以切换为大端;而MIPS等芯片要么采用全部大端的方式储存,要么提供选项支持大端——可以在大小端之间切换。另外,对于大小端的处理也和编译器的实现有关,在C语言中,默认是小端(但在一些对于单片机的实现中却是基于大端,比如Keil 51C),Java是平台无关的,默认是大端。在网络上传输数据普遍采用的都是大端。

#include <stdio.h>
struct ST{
    short val1;
    short val2;
};
union U{
    int val;
    struct ST st;
};
 
int main(void)
{
    int a = 0;
    union U u1, u2;
 
    a = 0x12345678;
    u1.val = a;
    printf("u1.val is 0x%x\n", u1.val);
    printf("val1 is 0x%x\n", u1.st.val1);
    printf("val2 is 0x%x\n", u1.st.val2);
    printf("after first convert is: 0x%x\n", htonl(u1.val));
    u2.st.val2 = htons(u1.st.val1);
    u2.st.val1 = htons(u1.st.val2);
    printf("after second convert is: 0x%x\n", u2.val);
    return 0;
}

shell命令判断大小端模式

  • dpkg-architecture命令
$ dpkg-architecture 
DEB_BUILD_ARCH=arm64
DEB_BUILD_ARCH_ABI=base
DEB_BUILD_ARCH_BITS=64
DEB_BUILD_ARCH_CPU=arm64
DEB_BUILD_ARCH_ENDIAN=little
DEB_BUILD_ARCH_LIBC=gnu
DEB_BUILD_ARCH_OS=linux
DEB_BUILD_GNU_CPU=aarch64
DEB_BUILD_GNU_SYSTEM=linux-gnu
DEB_BUILD_GNU_TYPE=aarch64-linux-gnu
DEB_BUILD_MULTIARCH=aarch64-linux-gnu
DEB_HOST_ARCH=arm64
DEB_HOST_ARCH_ABI=base
DEB_HOST_ARCH_BITS=64
DEB_HOST_ARCH_CPU=arm64
DEB_HOST_ARCH_ENDIAN=little
DEB_HOST_ARCH_LIBC=gnu
DEB_HOST_ARCH_OS=linux
DEB_HOST_GNU_CPU=aarch64
DEB_HOST_GNU_SYSTEM=linux-gnu
DEB_HOST_GNU_TYPE=aarch64-linux-gnu
DEB_HOST_MULTIARCH=aarch64-linux-gnu
DEB_TARGET_ARCH=arm64
DEB_TARGET_ARCH_ABI=base
DEB_TARGET_ARCH_BITS=64
DEB_TARGET_ARCH_CPU=arm64
DEB_TARGET_ARCH_ENDIAN=little
DEB_TARGET_ARCH_LIBC=gnu
DEB_TARGET_ARCH_OS=linux
DEB_TARGET_GNU_CPU=aarch64
DEB_TARGET_GNU_SYSTEM=linux-gnu
DEB_TARGET_GNU_TYPE=aarch64-linux-gnu
DEB_TARGET_MULTIARCH=aarch64-linux-gnu
  • lscpu命令
$ lscpu
Architecture:        aarch64
Byte Order:          Little Endian
CPU(s):              4
On-line CPU(s) list: 0-3
Thread(s) per core:  1
Core(s) per socket:  4
Socket(s):           1
Vendor ID:           ARM
Model:               4
Model name:          Cortex-A53
Stepping:            r0p4
CPU max MHz:         1296.0000
CPU min MHz:         408.0000
BogoMIPS:            48.00
L1d cache:           unknown size
L1i cache:           unknown size
L2 cache:            unknown size
Flags:               fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid