Mach-O,是 Mach object 文件格式的缩写,同样也是 OS X 和 iOS 系统中可执行文件格式。类似于 Linux 下的 elf。除了可执行文件外,动态链接库、静态链接库等都是这种格式的。了解 Mach-O ,也助于我们更好的学习 iOS 逆向工程。

Mach-O 结构简单介绍

Mach-O 主要由三部分组成:HeaderLoad commandsRaw segment data,如下图所示:

Header保存了 Mach-O 的一些基本信息,包括了平台、文件类型、LoadCommands 的个数等等。

LoadCommands这些指令非常清晰地指示加载器如何设置并且加载二进制数据,这一段紧跟 Header,加载 Mach-O 文件时会使用这里的数据来确定内存的分布。

Data 每一个 segment 的具体数据都保存在这里,这里包含了具体的代码、数据等等。

Mach-O 三部分结构详细介绍

我们可以用 otool 先来看看 iOS 中一个可执行文件的头信息,这里拿微信来看看,otool -h WeChat:

$ otool -h WeChat 
Mach header
magic cputype cpusubtype caps filetype ncmds sizeofcmds flags
0xfeedface 12 9 0x00 2 86 8192 0x00218085
Mach header
magic cputype cpusubtype caps filetype ncmds sizeofcmds flags
0xfeedfacf 16777228 0 0x00 2 86 8984 0x00218085

下面详细介绍下这都代表什么意思。

头信息的结构可以在 /usr/include/mach-o/loader.h 中查看

/*
* The 32-bit mach header appears at the very beginning of the object file for
* 32-bit architectures.
*/
struct mach_header {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
};

/* Constant for the magic field of the mach_header (32-bit architectures) */
#define MH_MAGIC 0xfeedface /* the mach magic number */
#define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */

/*
* The 64-bit mach header appears at the very beginning of object files for
* 64-bit architectures.
*/
struct mach_header_64 {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
uint32_t reserved; /* reserved */
};

/* Constant for the magic field of the mach_header_64 (64-bit architectures) */
#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */
#define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */

magic : 魔数
从上面的宏定义可以看出 0xfeedface 代表的是 32 位,0xfeedfacf 代表 64 位
cputype : cpu 的类型
在 /usr/include/mach/machine.h 中可以看到相关的定义

/*
* Machine types known by all.
*/

#define CPU_TYPE_ANY ((cpu_type_t) -1)

#define CPU_TYPE_VAX ((cpu_type_t) 1)
/* skip ((cpu_type_t) 2) */
/* skip ((cpu_type_t) 3) */
/* skip ((cpu_type_t) 4) */
/* skip ((cpu_type_t) 5) */
#define CPU_TYPE_MC680x0 ((cpu_type_t) 6)
#define CPU_TYPE_X86 ((cpu_type_t) 7)
#define CPU_TYPE_I386 CPU_TYPE_X86 /* compatibility */
#define CPU_TYPE_X86_64 (CPU_TYPE_X86 | CPU_ARCH_ABI64)

/* skip CPU_TYPE_MIPS ((cpu_type_t) 8) */
/* skip ((cpu_type_t) 9) */
#define CPU_TYPE_MC98000 ((cpu_type_t) 10)
#define CPU_TYPE_HPPA ((cpu_type_t) 11)
#define CPU_TYPE_ARM ((cpu_type_t) 12)
#define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
#define CPU_TYPE_MC88000 ((cpu_type_t) 13)
#define CPU_TYPE_SPARC ((cpu_type_t) 14)
#define CPU_TYPE_I860 ((cpu_type_t) 15)
/* skip CPU_TYPE_ALPHA ((cpu_type_t) 16) */
/* skip ((cpu_type_t) 17) */
#define CPU_TYPE_POWERPC ((cpu_type_t) 18)
#define CPU_TYPE_POWERPC64 (CPU_TYPE_POWERPC | CPU_ARCH_ABI64)

cupsubtype cpu 的子类型
和 cputype 一样,也可以在 /usr/include/mach/machine.h 中可以看到相关的定义,其中有关于 PowerPC 的,Mips 的等等,这里列一下关于 ARM 的:

...
...
...

/*
* ARM subtypes
*/
#define CPU_SUBTYPE_ARM_ALL ((cpu_subtype_t) 0)
#define CPU_SUBTYPE_ARM_V4T ((cpu_subtype_t) 5)
#define CPU_SUBTYPE_ARM_V6 ((cpu_subtype_t) 6)
#define CPU_SUBTYPE_ARM_V5TEJ ((cpu_subtype_t) 7)
#define CPU_SUBTYPE_ARM_XSCALE ((cpu_subtype_t) 8)
#define CPU_SUBTYPE_ARM_V7 ((cpu_subtype_t) 9)
#define CPU_SUBTYPE_ARM_V7F ((cpu_subtype_t) 10) /* Cortex A9 */
#define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t) 11) /* Swift */
#define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t) 12)
#define CPU_SUBTYPE_ARM_V6M ((cpu_subtype_t) 14) /* Not meant to be run under xnu */
#define CPU_SUBTYPE_ARM_V7M ((cpu_subtype_t) 15) /* Not meant to be run under xnu */
#define CPU_SUBTYPE_ARM_V7EM ((cpu_subtype_t) 16) /* Not meant to be run under xnu */
...
...
...

filetype 文件的类型
相关定义在 /usr/include/mach-o/loader.h 中

#define	MH_OBJECT	0x1		/* relocatable object file */
#define MH_EXECUTE 0x2 /* demand paged executable file */
#define MH_FVMLIB 0x3 /* fixed VM shared library file */
#define MH_CORE 0x4 /* core file */
#define MH_PRELOAD 0x5 /* preloaded executable file */
#define MH_DYLIB 0x6 /* dynamically bound shared library */
#define MH_DYLINKER 0x7 /* dynamic link editor */
#define MH_BUNDLE 0x8 /* dynamically bound bundle file */
#define MH_DYLIB_STUB 0x9 /* shared library stub for static */
...
...
...
常用的如下:
MH_OBJECT    编译过程中产生的 *.obj 文件
MH_EXECUTABLE    可执行二进制文件
MH_DYLIB    动态库

ncmds 指的是加载命令 (load commands) 的数量
sizeofcmds 表示 load commands 的总字节大小
flags 一个包含一组位标志的整数,它显示了 Mach-O 文件格式的某些可选特性的状态。

加载命令

这些加载命令在 Mach-O 文件加载解析时,被内核加载器或者动态链接器调用,指导如何设置加载对应的二进制数据段,加载命令的种类有很多种,在 /usr/include/mach-o/loader.h 头文件有简单的注释

struct load_command {
uint32_t cmd; /* type of load command */
uint32_t cmdsize; /* total size of command in bytes */
};

/*
* After MacOS X 10.1 when a new load command is added that is required to be
* understood by the dynamic linker for the image to execute properly the
* LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic
* linker sees such a load command it it does not understand will issue a
* "unknown load command required for execution" error and refuse to use the
* image. Other load commands without this bit that are not understood will
* simply be ignored.
*/
#define LC_REQ_DYLD 0x80000000

/* Constants for the cmd field of all load commands, the type */
#define LC_SEGMENT 0x1 /* segment of this file to be mapped */
#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */
#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */
#define LC_THREAD 0x4 /* thread */
#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */
#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */
#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */
#define LC_IDENT 0x8 /* object identification info (obsolete) */
#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */
#define LC_PREPAGE 0xa /* prepage command (internal use) */
#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */
#define LC_LOAD_DYLIB 0xc /* load a dynamically linked shared library */
...
...
...

LC_SYMTAB 符号表地址

LC_DYSYMTAB 动态符号表地址

LC_LOAD_DYLINKER 使用何种动态加载库

LC_UUID 文件的唯一标识

LC_VERSION_MIN_MACOSX 二进制文件要求的最低操作系统版本

LC_SOURCE_VERSION 构建该二进制文件使用的源代码版本

LC_MAIN 设置程序主线程的入口地址和栈大小

通过 otool -lv 来查看一下 WeChat

$ otool -lv WeChat 
WeChat (architecture armv7):
Mach header
magic cputype cpusubtype caps filetype ncmds sizeofcmds flags
MH_MAGIC ARM V7 0x00 EXECUTE 89 8292 NOUNDEFS DYLDLINK TWOLEVEL WEAK_DEFINES BINDS_TO_WEAK PIE
Load command 0
cmd LC_SEGMENT
cmdsize 56
segname __PAGEZERO
vmaddr 0x00000000
vmsize 0x00004000
fileoff 0
filesize 0
maxprot ---
initprot ---
nsects 0
flags (none)
Load command 1
cmd LC_SEGMENT
cmdsize 736
segname __TEXT
vmaddr 0x00004000
vmsize 0x034dc000
fileoff 0
filesize 55427072
maxprot r-x
initprot r-x
nsects 10
flags (none)
Section
sectname __text
segname __TEXT
addr 0x0000aff0
size 0x02d4bac4
offset 28656
align 2^4 (16)
reloff 0
nreloc 0
type S_REGULAR
attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
reserved1 0
reserved2 0
...
...
...

其中每个 load command 的结构如下:

struct segment_command { /* for 32-bit architectures */
uint32_t cmd; /* LC_SEGMENT */
uint32_t cmdsize; /* includes sizeof section structs */
char segname[16]; /* segment name */
uint32_t vmaddr; /* memory address of this segment */
uint32_t vmsize; /* memory size of this segment */
uint32_t fileoff; /* file offset of this segment */
uint32_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
};

/*
* The 64-bit segment load command indicates that a part of this file is to be
* mapped into a 64-bit task's address space. If the 64-bit segment has
* sections then section_64 structures directly follow the 64-bit segment
* command and their size is reflected in cmdsize.
*/
struct segment_command_64 { /* for 64-bit architectures */
uint32_t cmd; /* LC_SEGMENT_64 */
uint32_t cmdsize; /* includes sizeof section_64 structs */
char segname[16]; /* segment name */
uint64_t vmaddr; /* memory address of this segment */
uint64_t vmsize; /* memory size of this segment */
uint64_t fileoff; /* file offset of this segment */
uint64_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
};

cmd 是 load command 的类型
cmdsize 代表 load command 的大小
segname 段名字
vmaddr 段的虚拟内存起始地址
vmsize 段的虚拟内存大小
fileoff 段在文件中的偏移量
filesize 段在文件中的大小
maxprot 段页面所需要的最高内存保护
initprot 段页面初始的内存保护
nsects 段中包含 section 的数量
flags 其他杂项标志位

Data

section 的机构如下:

struct section { /* for 32-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint32_t addr; /* memory address of this section */
uint32_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
};

struct section_64 { /* for 64-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint64_t addr; /* memory address of this section */
uint64_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
uint32_t reserved3; /* reserved */
};

sectname section 名
segname 该 section 所属的 segment 名
addr 该 section 在内存的启始位置
size 该 section 的大小
offset 该 section 的文件偏移
align 字节大小对齐
reloff 重定位入口的文件偏移
nreloc 需要重定位的入口数量
flags 包含 section 的 type 和 attributes

可以通过 otool –s 查看某 segment 的某个 section:

$ otool -s __TEXT __text WeChat
WeChat (architecture armv7):
Contents of (__TEXT,__text) section
0000aff0 af03b5f0 8d04f84d f6444606 f2c070fe
0000b000 f644304d 447871fc 314df2c0 46904479
0000b010 68096800 680c6800 8000f846 b9955935
0000b020 70e8f644 304df2c0 68004478 58306800
...
...
...

MachOView

除了用 otool 查看 Mach-O 外,还可以通过 MachOView 可视化工具来查看
MachOView下载地址:http://sourceforge.net/projects/machoview/
MachOView源码地址:https://github.com/gdbinit/MachOView
效果如下:


相关资料

Mach-O-File-Format