From 3864104e672e228beee4c26132eb14ce106e5279 Mon Sep 17 00:00:00 2001 From: Minecon724 Date: Sat, 19 Oct 2024 11:50:02 +0200 Subject: [PATCH] ELF support and other stuff not working still --- Makefile | 4 +- README.md | 29 ++++++++ README.txt | 23 ------ include/elf_program_loader.h | 8 ++ programs/return.elf | Bin 0 -> 4860 bytes programs/return2.bin | Bin 184 -> 0 bytes programs/return2.elf | Bin 0 -> 4840 bytes src/elf_program_loader.c | 137 +++++++++++++++++++++++++++++++++++ src/instruction_executor.c | 2 +- src/main.c | 25 +++++-- src/program_loader.c | 25 ++++++- 11 files changed, 219 insertions(+), 34 deletions(-) create mode 100644 README.md delete mode 100644 README.txt create mode 100644 include/elf_program_loader.h create mode 100755 programs/return.elf delete mode 100755 programs/return2.bin create mode 100755 programs/return2.elf create mode 100644 src/elf_program_loader.c diff --git a/Makefile b/Makefile index a10a0f1..8bebe20 100644 --- a/Makefile +++ b/Makefile @@ -9,9 +9,9 @@ CC := gcc # -O3: Optimize code (level 3) # -Wno-unused-variable: Disable warnings for unused variables ifeq ($(DEBUG),1) - CFLAGS := -Wall -Wextra -std=gnu23 -I include -O0 -g + CFLAGS := -Wall -Wextra -std=gnu23 -I include -O0 -g -lelf else - CFLAGS := -Wall -Wextra -std=gnu23 -I include -O3 + CFLAGS := -Wall -Wextra -std=gnu23 -I include -O3 -lelf endif # Directory for build outputs diff --git a/README.md b/README.md new file mode 100644 index 0000000..f67b264 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +RISC-V (rv32i) emulator in C \ +This is just for me to understand how all this works, and to learn something new. \ +So don't use it. + +Example programs: +- `return.bin` returns -1094647826 (puts it to register 10) +- `return2.elf` returns a different value with more steps. if it returns DADD, something's wrong + +Emulator exit codes: +- `-1` - reached end of address space +- `0` - never happens +- `1` - invalid opcode +- `2` - illegal instruction argument (like funct3) + +Compile: +0. Requirements: `libelf` +1. `make` - should display no warnings +2. Executable is `build/criscv` + +Compile programs: +0. Get the toolchain obviously +1. ``` + riscv32-unknown-elf-gcc -ffreestanding -nostdlib -Ttext=0x0 -e main -O0 -o program.elf program.c + ``` +3. `program.bin` is the binary file with the program, pass it as an argument + +rv32i, ilp32 compatible toolchain for 64bit Linux: https://lfs.m724.eu/toolchain.tar.zst `adaa74f263dcba430da588b1109bc3b90bd90a84c67b06213bd03a7bbacd1a2a` \ +Or just the stuff necessary to make a binary file: https://lfs.m724.eu/toolchainlite.tar.zst `55e79dff7ba4093dedb8151461508fc157525ad89615d49d737845af03d1643f` \ +Those were compiled with `./configure --prefix=$(pwd)/../toolchain --with-arch=rv32i --with-abi=ilp32` and `make` \ No newline at end of file diff --git a/README.txt b/README.txt deleted file mode 100644 index 0476d2d..0000000 --- a/README.txt +++ /dev/null @@ -1,23 +0,0 @@ -RISC-V (rv32i) emulator in C -This is just for me to understand how all this works, and to learn something new. -So don't use it. - -Example programs: -- return.bin: returns -1094647826 (puts it to register 10) -- return2.bin: returns a different value with more steps. if it returns DADD, something's wrong - -Emulator exit codes: -- <0 = OK -- 0 = never happens -- 1 - invalid opcode -- 2 - illegal instruction argument (like funct3) - -To compile stuff: -0. Get the toolchain obviously -1. riscv32-unknown-elf-gcc -ffreestanding -nostdlib -nostartfiles -Wl,--no-relax -Ttext=0x0 -e main -O0 -o program.elf program.c -2. riscv32-unknown-elf-objcopy -O binary program.elf program.bin -3. program.bin is the binary file with the program, pass it as an argument - -rv32i, ilp32 compatible toolchain for 64bit Linux: https://lfs.m724.eu/toolchain.tar.zst adaa74f263dcba430da588b1109bc3b90bd90a84c67b06213bd03a7bbacd1a2a -Or just the stuff necessary to make a binary file: https://lfs.m724.eu/toolchainlite.tar.zst 55e79dff7ba4093dedb8151461508fc157525ad89615d49d737845af03d1643f -Those were compiled with `./configure --prefix=$(pwd)/../toolchain --with-arch=rv32i --with-abi=ilp32` and `make` \ No newline at end of file diff --git a/include/elf_program_loader.h b/include/elf_program_loader.h new file mode 100644 index 0000000..6281be3 --- /dev/null +++ b/include/elf_program_loader.h @@ -0,0 +1,8 @@ +#ifndef ELF_PROGRAM_LOADER_H +#define ELF_PROGRAM_LOADER_H + +#include "cpu.h" + +int load_elf_to_cpu_and_rom(const char filename[], CPU *cpu); + +#endif \ No newline at end of file diff --git a/programs/return.elf b/programs/return.elf new file mode 100755 index 0000000000000000000000000000000000000000..3875de5b779b0293087859be59e8a0dd44c89cf3 GIT binary patch literal 4860 zcmeHL%Syvg5S?pUi*BqILC{5|#TOE6Ehy@yFN+|Gv%vOVhtkV`c9b4%$b=yPO>@|#ocYj7 zBfhX0Pi54!x&qm0yr*T@wKgzP2}04Z)m}{-=~n9Zhf!RGz#mdz$j>QJ@bEMUszdPB z8>PfI29uaCPq{!8`}0)y#e*nruLbISpx$>^9bIAq=PQAJEl{5ZYOoEnRoB%TBs>0F zbADWJR<$~BHI0VQtt4<>E@UfNo-3Ai_Bh8nS1$A7Uct$$w#{v!SprPWXkhdEtbU{k z4Y=MwwM4bgO`%mW3d@ZrUMBR80JYXMO}&AM^F>rm2+OFR3El3$=pO=oPvV_)7{`!L z(rr8=eTQroo#s}kKqKDA+|)T!k9-N8v?2$@6KO`CRzw)hkWn1`0}gTS2{F=(=vnB# d2*C(CUeC96{Og*2^>sb^83*U?5!m`+?i0PlK= diff --git a/programs/return2.elf b/programs/return2.elf new file mode 100755 index 0000000000000000000000000000000000000000..a6459430ebe561a89b40aaac46e96f6e57ea1605 GIT binary patch literal 4840 zcmeHLze`(D6#i~rV(cKAJ`mADQk%z`i;v&-4p-caS7-JepOnA8!os5XM3K{$d zTr)YuQ9*F+)~!=Z=F%ZciT%!dFV84aO6PkX=bm5RJ@>xvZfF1Phj&UT3?+hJkf=e; zQ@GHFkl!k3>XFs1BL;^fMLKeU>GnA{3ippQiGI}Kx+N~c2q@DnX`fQ>Uz+PcfJ*8 z`sm&O?c?4=9hd0##Q)l{kK#B?UMVYmptwI$ZUMBnsSNw1y*^6z2RINXbrs<5op;K6 z_gbyr-nlQY2Y!sc9gpFV_c$;2@z{Cq*VdMk+1JU%ypwYlusX*+Zlzh=fs%#v-G?h~-Z!LWG4#=!;|V^X3^rHj1COb`Y=AJ|%jZR{U%b&(b!6oYbL9 zP3;ts-VN`#l|!{&MXgZT2At+zO&2z3wd>nYLr<4>HJoC-Rx54O@&4E;Xzf-uKC>$i z`9~4H +#include +#include +#include +#include + +#include "cpu.h" + +static inline int process_elf(Elf *elf, CPU *cpu) { + AddressSpace *addressSpace = cpu->addressSpace; + + if (elf_kind(elf) != ELF_K_ELF) { + fprintf(stderr, "File is not a valid ELF file\n"); + return 1; + } + + GElf_Ehdr ehdr; + if (gelf_getehdr(elf, &ehdr) == NULL) { + fprintf(stderr, "gelf_getehdr() failed: %s\n", elf_errmsg(-1)); + return 1; + } + + if (ehdr.e_type != ET_EXEC) { + printf("ELF is not an executable, proceeding anyway.\n"); + } + + if (ehdr.e_machine != EM_RISCV) { + fprintf(stderr, "ELF is not for RISC-V. Machine: 0x%X\n", ehdr.e_machine); + return 1; + } + + Elf_Scn *scn = NULL; + GElf_Shdr shdr; + + int warnedArch = 0; + + size_t shdrstrndx; // Section HeaDeR STRing table iNDeX + if (elf_getshdrstrndx(elf, &shdrstrndx) != 0) { + fprintf(stderr, "elf_getshdrstrndx error: %s\n", elf_errmsg(-1)); + return 1; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + if (gelf_getshdr(scn, &shdr) != &shdr) { + fprintf(stderr, "getshdr() failed: %s\n", elf_errmsg(-1)); + continue; + } + + char *sectionName = elf_strptr(elf, shdrstrndx, shdr.sh_name); + + if (shdr.sh_type == SHT_RISCV_ATTRIBUTES) { + Elf_Data *data = elf_getdata(scn, NULL); + + if (data == NULL || data->d_size == 0) { + fprintf(stderr, "elf_getdata() failed: %s\n", elf_errmsg(-1)); + continue; + } + + // TODO this is highly fragile + // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc + char *content = (char *)data->d_buf; + content += 19; // first 17 chars are subsection headers, and I have to +2 to make strcmp match + if (strcmp(content, "rv32i2p1") == 0) { + warnedArch = 1; + } + + break; + } else if (strcmp(sectionName, ".text") == 0) { + Elf_Data *data = elf_getdata(scn, NULL); + + if (data == NULL || data->d_size == 0) { + fprintf(stderr, "elf_getdata() failed: %s\n", elf_errmsg(-1)); + continue; + } + + char *content = (char *)data->d_buf; + uint32_t contentSize = data->d_size; + + if (contentSize > addressSpace->romSize) { + fprintf(stderr, "Code has %d bytes, and doesn't fit in ROM of capacity %d bytes\n", contentSize, addressSpace->romSize); + return 1; + } + + memcpy(addressSpace->rom, content, contentSize); + + printf("Loaded %d bytes or %d instructions from an ELF file\n", contentSize, contentSize / 4); + } else { + printf("Unrecognized section: %s\n", sectionName); + } + } + + if (warnedArch == 0) { + printf("I couldn't verify whether the ELF was compiled for the correct instruction set!\n"); + } + + cpu->programCounter = ehdr.e_entry; + + return 0; +} + +int load_elf_to_cpu_and_rom(const char filename[], CPU *cpu) { + if (elf_version(EV_CURRENT) == EV_NONE) { + fprintf(stderr, "Invalid ELF version: %s\n", elf_errmsg(-1)); // TODO can perror be used? + return 1; + } + + int code = 0; + + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + perror("Failed opening file"); + return 1; + } + + Elf *elf = elf_begin(fd, ELF_C_READ, NULL); + if (elf == NULL) { + fprintf(stderr, "Error reading ELF file: %s\n", elf_errmsg(-1)); + if (close(fd) < 0) { + perror("Also failed closing file"); + } + return 1; + } + + if ((code = process_elf(elf, cpu)) != 0) { + fprintf(stderr, "Failed processing ELF\n"); + } + + if (elf_end(elf) < 0) { + fprintf(stderr, "Failed closing ELF: %s\n", elf_errmsg(-1)); // TODO can perror be used? + } + + if (close(fd) < 0) { + perror("Failed closing file"); // TODO perhaps make one close for all the errors above + } + + return code; +} \ No newline at end of file diff --git a/src/instruction_executor.c b/src/instruction_executor.c index a3f0b55..3b90e66 100644 --- a/src/instruction_executor.c +++ b/src/instruction_executor.c @@ -255,7 +255,7 @@ int execute_instruction_on_cpu(CPU *cpu, uint32_t instruction) { // TODO conside } default: { // TODO illegal instruction, proper error handling - fprintf(stderr, "Unrecognized opcode!"); + fprintf(stderr, "Unrecognized opcode: 0x%X", opcode); return 1; break; } diff --git a/src/main.c b/src/main.c index 23b251a..ce8285c 100644 --- a/src/main.c +++ b/src/main.c @@ -2,8 +2,10 @@ #include #include #include +#include #include "program_loader.h" +#include "elf_program_loader.h" #include "cpu.h" int main(int argc, char *argv[]) { @@ -15,16 +17,23 @@ int main(int argc, char *argv[]) { AddressSpace *addressSpace = create_address_space(256, 256); printf("Address space: %dB ROM, %dB RAM\n", addressSpace->romSize, addressSpace->ramSize); - if (load_to_rom(argv[1], addressSpace) != 0) { + CPU cpu = create_cpu(addressSpace); + cpu.registers[1] = addressSpace->romSize + addressSpace->ramSize; // make jumping to x1 end the program + + + int lres = load_to_rom(argv[1], addressSpace); + + if (lres == -1) { + lres = load_elf_to_cpu_and_rom(argv[1], &cpu); + } + + if (lres != 0) { fprintf(stderr, "Error loading program\n"); return 1; } - CPU cpu = create_cpu(addressSpace); - cpu.registers[1] = addressSpace->romSize + addressSpace->ramSize; // make jumping to x1 end the program - cpu.programCounter = 0x70; - printf("\n----- Start of program -----"); + printf("\n----- Start of program (0x%X) -----", cpu.programCounter); int code; uint32_t cycles = 0; @@ -33,12 +42,14 @@ int main(int argc, char *argv[]) { //sleep(1); } - printf("\n\n----- End of program -----\n"); + printf("\n\n----- End of program (0x%X) -----\n", cpu.programCounter); printf("\n Emulator exit code: \033[1m%d\033[0m\n", code); printf(" Program exit code: \033[1m%d\033[0m (x10)\n", cpu.registers[10]); printf(" Cycles: \033[1m%u\033[0m\n\n", cycles); + // TODO don't + int cols[8] = {0}; int total = 0; @@ -91,5 +102,7 @@ int main(int argc, char *argv[]) { } printf("\n"); + printf("The register dump might have some visual glitches, the data itself is ok\n"); // TODO fix + return 0; } \ No newline at end of file diff --git a/src/program_loader.c b/src/program_loader.c index 7ec3da5..14a282e 100644 --- a/src/program_loader.c +++ b/src/program_loader.c @@ -1,6 +1,19 @@ #include +#include +#include + #include "address_space.h" +static const unsigned char elf_magic[] = {0x7F, 0x45, 0x4c, 0x46}; + +static inline bool check_elf(FILE *file) { // TODO maybe move + uint8_t header[4]; + fread(header, 4, 1, file); + fseek(file, 0, SEEK_SET); + + return memcmp(header, elf_magic, 4) == 0; +} + int load_to_rom(const char filename[], AddressSpace *addressSpace) { FILE *file = fopen(filename, "rb"); @@ -9,13 +22,21 @@ int load_to_rom(const char filename[], AddressSpace *addressSpace) { return 1; } + if (check_elf(file)) { + return -1; + } + // size_t is not used because a program can't be larger than the 32bit address space int romSize = addressSpace->romSize; - int bytesRead = fread(addressSpace->rom, 1, romSize, file); + int bytesRead = fread(addressSpace->rom, 1, romSize, file); fseek(file, 0, SEEK_END); int fileLen = ftell(file); + if (fileLen % 4 != 0) { + printf("Suspicious file size of %d bytes, not divisible by 4\n", fileLen); // TODO maybe handle it in fread + } + if (fileLen > romSize) { fprintf(stderr, "File has %d bytes, and doesn't fit in ROM of capacity %d bytes\n", fileLen, romSize); return 1; @@ -24,7 +45,7 @@ int load_to_rom(const char filename[], AddressSpace *addressSpace) { return 1; } - printf("%s has %d bytes or %d instructions\n", filename, fileLen, fileLen / 4); + printf("%s has %d bytes or %d instructions from a binary file\n", filename, fileLen, fileLen / 4); return 0; } \ No newline at end of file