diff --git a/ELFInfo.cpp b/ELFInfo.cpp index 76a13ef..a276e99 100644 --- a/ELFInfo.cpp +++ b/ELFInfo.cpp @@ -4,14 +4,16 @@ #include #include #include +#include #include "elfspy/Report.h" #include "elfspy/Fail.h" +#include "elfspy/MFile.h" +#include "elfspy/SectionHeader.h" #ifdef __x86_64__ using Elf_Phdr = Elf64_Phdr; using Elf_Ehdr = Elf64_Ehdr; -using Elf_Shdr = Elf64_Shdr; using Elf_Rel = Elf64_Rel; using Elf_Rela = Elf64_Rela; using Elf_Sym = Elf64_Sym; @@ -22,7 +24,6 @@ using Elf_Sym = Elf64_Sym; #else using Elf_Phdr = Elf32_Phdr; using Elf_Ehdr = Elf32_Ehdr; -using Elf_Shdr = Elf32_Shdr; using Elf_Rel = Elf32_Rel; using Elf_Rela = Elf32_Rela; using Elf_Sym = Elf32_Sym; @@ -34,7 +35,6 @@ using Elf_Sym = Elf32_Sym; namespace { - long page_size = sysconf(_SC_PAGESIZE); union Address { @@ -57,57 +57,118 @@ inline void Address::round_up() value_ *= page_size; } +// see if a library has a corresponding debug file where the symbols are kept +std::string get_debug_file_name(const char* file_name) +{ + const char* slash = nullptr; + for (const char* seek = file_name; *seek; ++seek) { + if (*seek == '/') slash = seek; + } + if (!slash) return { }; + std::string name; + name = "/usr/lib/debug"; + name.append(file_name, slash - file_name); + DIR* dir = opendir(name.c_str()); + if (!dir) return { }; + const char* period = nullptr; + for (const char* seek = slash; *seek; ++seek) { + if (*seek == '.') { + period = seek; + break; + } + } + if (!period) return { }; + const char* base_name = slash + 1; + size_t name_root_len = period - base_name; + struct dirent* entry; + while ((entry = readdir(dir))) { + if (strncmp(entry->d_name, base_name, name_root_len) == 0) { + const char* after_root = entry->d_name + name_root_len; + // files in debug dir seem to be named libxxx-major.minor.so + if (*after_root == '-' || *after_root == '.') { + closedir(dir); + name += '/'; + name += entry->d_name; + return name; + } + } + } + closedir(dir); + return { }; +} + } // namespace namespace spy { -ELFInfo::ELFInfo(unsigned char* data, const char* name) +ELFInfo::ELFInfo(const char* name) { - data_ = data; name_ = name; + files_.emplace_back(std::make_unique(name)); + std::string debug_file_name = get_debug_file_name(name); + if (!debug_file_name.empty()) { + files_.emplace_back(std::make_unique(debug_file_name.c_str())); + } if (!is_elf()) { Fail() << "Not ELF data - no ELF header found in " << name; } } +ELFInfo::~ELFInfo() +{ +} + +ELFObject ELFInfo::prepare_object(unsigned char* base) const +{ + ELFObject object; + object.name_ = name_; + object.base_ = base; + object.size_ = files_[0]->size(); + unprotect(base); + return object; +} + bool ELFInfo::is_elf() const { - auto elf = reinterpret_cast(data_); + auto elf = reinterpret_cast(files_[0]->address()); return elf->e_ident[EI_MAG0] == ELFMAG0 && elf->e_ident[EI_MAG1] == ELFMAG1 && elf->e_ident[EI_MAG2] == ELFMAG2 && elf->e_ident[EI_MAG3] == ELFMAG3; } -void* ELFInfo::find_section(const char* name) +SectionHeader ELFInfo::find_header(const char* name) const { - auto elf = reinterpret_cast(data_); - auto header = data_ + elf->e_shoff; - auto section_names = - reinterpret_cast(header + elf->e_shstrndx * elf->e_shentsize); - auto names = reinterpret_cast(elf) + section_names->sh_offset; - // .shstrtab is the "section header" string table, it is indexed in elf header - for (size_t n = 0; n != elf->e_shnum; ++n, header += elf->e_shentsize) { - auto section = reinterpret_cast(header); - if (strcmp(names + section->sh_name, name) == 0) { - return section; + for (auto& file : files_) { + auto elf = reinterpret_cast(file->address()); + auto header = file->address() + elf->e_shoff; + auto section_names = + reinterpret_cast(header + elf->e_shstrndx * elf->e_shentsize); + auto names = reinterpret_cast(elf) + section_names->sh_offset; + // .shstrtab is the "section header" string table + // it is indexed in elf header + for (size_t n = 0; n != elf->e_shnum; ++n, header += elf->e_shentsize) { + auto section = reinterpret_cast(header); + if (strcmp(names + section->sh_name, name) == 0) { + return { file->address(), section }; + } } } if (strncmp(name_, "/lib", 4) != 0 && strncmp(name_, "/usr", 4) != 0) { Report() << "no " << name << " section found in " << name_; } - return nullptr; + return { }; } -void ELFInfo::unprotect(unsigned char* base, const char* name) +void ELFInfo::unprotect(unsigned char* base, const char* name) const { - auto section = reinterpret_cast(find_section(name)); + SectionHeader section = find_header(name); if (section) { Address begin; Address end; - begin.pointer_ = base + section->sh_addr; - end.pointer_ = begin.pointer_ + section->sh_size; + begin.pointer_ = base + section.header_->sh_addr; + end.pointer_ = begin.pointer_ + section.header_->sh_size; begin.round_down(); end.round_up(); size_t size = end.value_ - begin.value_; @@ -118,38 +179,34 @@ void ELFInfo::unprotect(unsigned char* base, const char* name) } } -void ELFInfo::unprotect(unsigned char* base) +void ELFInfo::unprotect(unsigned char* base) const { // by default this memory is read only - ELFspy needs to change it unprotect(base, ".got"); unprotect(base, ".got.plt"); } -ELFInfo::Symbol ELFInfo::get_symbol_rela(size_t value) +const char* ELFInfo::find_name(size_t name_offset) const +{ + SectionHeader str_tab = find_header(".dynstr"); + if (!str_tab) { + return nullptr; + } + return reinterpret_cast(str_tab.begin()) + name_offset; +} + +ELFInfo::Symbol ELFInfo::get_symbol_rela(size_t value) const { Symbol result; - auto str_tab = reinterpret_cast(find_section(".dynstr")); - if (!str_tab) { - return result; - } - auto strings = reinterpret_cast(data_ + str_tab->sh_offset); - // find dynamic symbol table - auto symbol_table = reinterpret_cast(find_section(".dynsym")); - if (!symbol_table) { - return result; - } // find symbol by value in dynamic symbol table size_t index = 0; - auto symbols = data_ + symbol_table->sh_offset; - auto end = symbols + symbol_table->sh_size; - for ( ; symbols < end; symbols += symbol_table->sh_entsize) { - auto symbol = reinterpret_cast(symbols); - if (ELF_STTYPE(symbol->st_info) == STT_FUNC && symbol->st_value == value) { + for (auto& symbol : find_header(".dynsym").as_section()) { + if (ELF_STTYPE(symbol.st_info) == STT_FUNC && symbol.st_value == value) { // the symbol is defined in this file as 0 is undefined. // an STT_GNU_IFUNC will not match here as the function value will be the // the resulting function of the resolver function and therefore at a // different address - result.name_ = strings + symbol->st_name; + result.name_ = find_name(symbol.st_name); break; } ++index; @@ -157,24 +214,16 @@ ELFInfo::Symbol ELFInfo::get_symbol_rela(size_t value) if (!result.name_) { return result; // not found } - auto rela_plt = reinterpret_cast(find_section(".rela.plt")); - if (!rela_plt) { - return result; - } - size_t rela_plt_entries = rela_plt->sh_size / rela_plt->sh_entsize; - unsigned char* rela = data_ + rela_plt->sh_offset; - // attempt to find symbol in .rela.plt - for (size_t n = 0; n != rela_plt_entries; ++n) { - auto reloc = reinterpret_cast(rela); - if (ELF_R_TYPE(reloc->r_info) == R_X86_64_JUMP_SLOT) { + // attempt to find symbol in .rela.plt using index from symbol table + for (auto& reloc : find_header(".rela.plt").as_section()) { + if (ELF_R_TYPE(reloc.r_info) == R_X86_64_JUMP_SLOT) { // find symbol by index - size_t symbol_index = ELF_R_SYM(reloc->r_info); + size_t symbol_index = ELF_R_SYM(reloc.r_info); if (symbol_index == index) { - result.rela_offset_ = reloc->r_offset; + result.rela_offset_ = reloc.r_offset; break; } } - rela += rela_plt->sh_entsize; } // a symbol will only be present in .rela.plt iff it was used in the ELF // object - otherwise only .dynsym and .symtab will contain it @@ -185,84 +234,44 @@ ELFInfo::Symbol ELFInfo::get_symbol_rela(size_t value) // look for STT_IFUNC symbols by finding the function address in the relocated // .rela.plt entries. STT_IFUNC are rare, but time(time_t*) is one of them -ELFInfo::Symbol ELFInfo::get_indirect_symbol_rela(unsigned char* base, +ELFInfo::Symbol ELFInfo::get_indirect_symbol_rela(const unsigned char* base, void* function) { Symbol result; - auto str_tab = reinterpret_cast(find_section(".dynstr")); - if (!str_tab) { - return result; - } - auto strings = reinterpret_cast(data_ + str_tab->sh_offset); - // find dynamic symbol table - auto symbol_table = reinterpret_cast(find_section(".dynsym")); - if (!symbol_table) { - return result; - } - auto symbols = data_ + symbol_table->sh_offset; // find function in relocated GOT - auto rela_plt = reinterpret_cast(find_section(".rela.plt")); - if (!rela_plt) { - return result; - } - size_t rela_plt_entries = rela_plt->sh_size / rela_plt->sh_entsize; - unsigned char* rela = data_ + rela_plt->sh_offset; - for (size_t n = 0; n != rela_plt_entries; ++n) { - auto reloc = reinterpret_cast(rela); - if (ELF_R_TYPE(reloc->r_info) == R_X86_64_IRELATIVE - && *reinterpret_cast(base + reloc->r_offset) == function) { - result.rela_offset_ = reloc->r_offset; + for (auto& reloc : find_header(".rela.plt").as_section()) { + if (ELF_R_TYPE(reloc.r_info) == R_X86_64_IRELATIVE && + *reinterpret_cast(base + reloc.r_offset) == function) { + result.rela_offset_ = reloc.r_offset; // find symbol by r_addend in dynamic symbol table - auto end = symbols + symbol_table->sh_size; - for ( ; symbols < end; symbols += symbol_table->sh_entsize) { - auto symbol = reinterpret_cast(symbols); - if (ELF_STTYPE(symbol->st_info) == STT_GNU_IFUNC - && symbol->st_value == reloc->r_addend) { - result.name_ = strings + symbol->st_name; + for (auto& symbol : find_header(".dynsym").as_section()) { + if (ELF_STTYPE(symbol.st_info) == STT_GNU_IFUNC + && symbol.st_value == reloc.r_addend) { + result.name_ = find_name(symbol.st_name); break; } } break; } - rela += rela_plt->sh_entsize; } return result; // whether found or not } size_t ELFInfo::get_symbol_rela_dyn(const char* name) { - auto str_tab = reinterpret_cast(find_section(".dynstr")); - if (!str_tab) { - return 0; - } - auto strings = reinterpret_cast(data_ + str_tab->sh_offset); // find symbol table - auto symbol_table = reinterpret_cast(find_section(".dynsym")); - if (symbol_table == nullptr) { - return 0; - } - auto symbols = data_ + symbol_table->sh_offset; - size_t symbol_entries = symbol_table->sh_size / symbol_table->sh_entsize; + auto symbol_table = find_header(".dynsym").as_section(); // find symbol in .rela.dyn for (const char* section : { ".rela.dyn", ".rela.plt" }) { - auto rela_dyn = reinterpret_cast(find_section(section)); - if (!rela_dyn) { - return 0; - } - size_t rela_entries = rela_dyn->sh_size / rela_dyn->sh_entsize; - unsigned char* rela = data_ + rela_dyn->sh_offset; - for (size_t n = 0; n != rela_entries; ++n) { - auto reloc = reinterpret_cast(rela); - size_t symbol_index = ELF_R_SYM(reloc->r_info); - if (symbol_index < symbol_entries) { - symbol_index *= symbol_table->sh_entsize; - auto symbol = reinterpret_cast(symbols + symbol_index); - const char* symbol_name = strings + symbol->st_name; - if (strcmp(name, symbol_name) == 0) { - return reloc->r_offset; + for (auto& reloc : find_header(section).as_section()) { + size_t symbol_index = ELF_R_SYM(reloc.r_info); + if (symbol_index < symbol_table.entries()) { + auto& symbol = symbol_table[symbol_index]; + const char* symbol_name = find_name(symbol.st_name); + if (symbol_name && strcmp(name, symbol_name) == 0) { + return reloc.r_offset; } } - rela += rela_dyn->sh_entsize; } } return 0; @@ -274,34 +283,24 @@ ELFInfo::get_vtables(unsigned char* base) if (!is_elf()) { return { }; } - auto elf = reinterpret_cast(data_); - auto header = data_ + elf->e_shoff; // get string table by finding the right strtab section // .dynstr is for DYNSYM section (it has a non-zero address) // .shstrtab is the "section header" string table, it is indexed in elf header // .strtab is for SYMTAB, which is what we want here - auto str_tab = reinterpret_cast(find_section(".strtab")); + SectionHeader str_tab = find_header(".strtab"); if (!str_tab) { return { }; } - auto strings = reinterpret_cast(data_ + str_tab->sh_offset); - // find symbol table - auto symbol_table = reinterpret_cast(find_section(".symtab")); - if (symbol_table == nullptr) { - return { }; - } + auto strings = reinterpret_cast(str_tab.begin()); std::unordered_map vtables; - // look for virtual tables - auto symbols = data_ + symbol_table->sh_offset; - auto end = symbols + symbol_table->sh_size; - for ( ; symbols < end; symbols += symbol_table->sh_entsize) { - auto symbol = reinterpret_cast(symbols); + // look for virtual tables in symbol table + for (auto& symbol : find_header(".symtab").as_section()) { // check if is defined (value != 0) - if (symbol->st_value && ELF_STTYPE(symbol->st_info) == STT_OBJECT) { + if (symbol.st_value && ELF_STTYPE(symbol.st_info) == STT_OBJECT) { // check if it is a virtual table (starts with "_ZTV") - auto name = strings + symbol->st_name; + auto name = strings + symbol.st_name; if (strncmp(name, "_ZTV", 4) == 0) { - vtables[name + 4] = base + symbol->st_value; + vtables[name + 4] = base + symbol.st_value; } } } diff --git a/ELFInfo.h b/ELFInfo.h index 6a94ff6..ad1cc55 100644 --- a/ELFInfo.h +++ b/ELFInfo.h @@ -1,12 +1,18 @@ #ifndef ELFSPY_ELFINFO_H #define ELFSPY_ELFINFO_H +#include +#include #include #include + #include "elfspy/ELFObject.h" namespace spy { +class MFile; +class ELFObject; +class SectionHeader; /** * @namespace spy @@ -14,14 +20,15 @@ namespace spy * can read data in ELF format and extract information from it */ -class ELFInfo : public ELFObject +class ELFInfo { public: /** * @param data ELF data * @param name name of ELF data for diagnostics */ - ELFInfo(unsigned char* data, const char* name); + ELFInfo(const char* name); + ~ELFInfo(); /// @return true if data is in ELF format bool is_elf() const; @@ -35,14 +42,14 @@ public: * @param value symbol offset value expected to be in symtab * @return name and offset (empty if not defined in file) */ - Symbol get_symbol_rela(size_t value); + Symbol get_symbol_rela(size_t value) const; /** * find symbol and offset name in .rela.plt if symbol is defined as STT_IFUNC * @param base base address of ELF object in memory * @param function function pointer * @return name and offset (0 if not defined in file) */ - Symbol get_indirect_symbol_rela(unsigned char* base, void* function); + Symbol get_indirect_symbol_rela(const unsigned char* base, void* function); /** * find symbol offset in .rela.dyn if symbol is found as undefined in file * @param name symbol name @@ -53,7 +60,7 @@ public: * remove write protection from the areas in memory that need to change * @param base base address in memory */ - void unprotect(unsigned char* base); + void unprotect(unsigned char* base) const; /** * find vtable adresses from symbol table * @param base - offset to return addresses relative to @@ -62,10 +69,14 @@ public: std::unordered_map get_vtables(unsigned char* base = nullptr); + ELFObject prepare_object(unsigned char* base) const; + private: - unsigned char* data_; - void* find_section(const char* name); - void unprotect(unsigned char* base, const char* name); + const char* name_; + std::vector> files_; + SectionHeader find_header(const char* name) const; + const char* find_name(size_t name_offset) const; + void unprotect(unsigned char* base, const char* name) const; }; } // namespace spy