From 95ff59ef0d4096de588a9589914f63e38bdf4a78 Mon Sep 17 00:00:00 2001 From: jmattaa Date: Wed, 13 Nov 2024 16:49:08 +0100 Subject: [PATCH 1/2] feat: add file extensions to filetype detection Signed-off-by: jmattaa --- include/filetypes/checktypes.h | 23 +++++--- src/filetypes/archives.c | 34 +++++++---- src/filetypes/checktype.c | 105 +++++++++++++++++---------------- src/filetypes/documents.c | 17 +++--- src/filetypes/medias.c | 20 ++++--- src/laser.c | 8 +-- 6 files changed, 119 insertions(+), 88 deletions(-) diff --git a/include/filetypes/checktypes.h b/include/filetypes/checktypes.h index 4849829..d871b3f 100644 --- a/include/filetypes/checktypes.h +++ b/include/filetypes/checktypes.h @@ -3,18 +3,27 @@ #include -struct laser_magicnumber +// https://en.wikipedia.org/wiki/List_of_file_signatures +// dis be good sometimes + +struct laser_filetype { unsigned char *magic; size_t magic_size; + + const char **extensions; + size_t extensions_count; }; int laser_checktype(const char *filename, - const struct laser_magicnumber formats[]); -int laser_checktype_ex(int fd, const char *filename, - const struct laser_magicnumber formats[]); -extern const struct laser_magicnumber laser_archiveformats[]; -extern const struct laser_magicnumber laser_mediaformats[]; -extern const struct laser_magicnumber laser_documentformats[]; + const struct laser_filetype formats[]); + +int laser_checktype_extension(const char *filename, + const struct laser_filetype formats[]); +int laser_checktype_magic(int fd, const struct laser_filetype formats[]); + +extern const struct laser_filetype laser_archiveformats[]; +extern const struct laser_filetype laser_mediaformats[]; +extern const struct laser_filetype laser_documentformats[]; #endif diff --git a/src/filetypes/archives.c b/src/filetypes/archives.c index c0308f7..cc7d7c3 100644 --- a/src/filetypes/archives.c +++ b/src/filetypes/archives.c @@ -1,14 +1,26 @@ #include "filetypes/checktypes.h" -const struct laser_magicnumber laser_archiveformats[] = { - {(unsigned char[]){0x50, 0x4B, 0x03, 0x04}, 4}, // ZIP and stuff based on it - {(unsigned char[]){0x1F, 0x8B}, 2}, // GZIP - {(unsigned char[]){'u', 's', 't', 'a', 'r'}, 5}, // TAR - {(unsigned char[]){0x42, 0x5A, 0x68}, 3}, // BZIP2 - {(unsigned char[]){0xFD, 0x37, 0x7A, 0x58, 0x5A}, 5}, // XZ - {(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}, - 7}, // RAR v1.5 to 4.0 - {(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00}, - 8}, // RAR v5+ +const struct laser_filetype laser_archiveformats[] = { + + { + (unsigned char[]){0x50, 0x4B, 0x03, 0x04}, + 4, + (const char *[]){"zip", "aar", "apk", "docx", "epub", "ipa", "jar", + "kmz", "maff", "msix", "odp", "ods", "odt", "pk3", + "pk4", "pptx", "usdz", "vsdx", "xlsx", "xpi"}, + 20, + }, // ZIP and stuff based on it + {(unsigned char[]){0x1F, 0x8B}, 2, (const char *[]){"gz"}, 1}, // GZIP + {(unsigned char[]){'u', 's', 't', 'a', 'r'}, 5, (const char *[]){"tar"}, + 1}, // TAR + {(unsigned char[]){0x42, 0x5A, 0x68}, 3, (const char *[]){"bz2"}, + 1}, // BZIP2 + {(unsigned char[]){0xFD, 0x37, 0x7A, 0x58, 0x5A}, 5, (const char *[]){"xz"}, + 1}, // XZ + {(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}, 7, + (const char *[]){"rar"}, 1}, // RAR v1.5 to 4.0 + {(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00}, 8, + (const char *[]){"rar"}, 1}, // RAR v5+ // ADD MORE ARCHIVES - {NULL, 0}}; + + {NULL, 0, NULL, 0}}; diff --git a/src/filetypes/checktype.c b/src/filetypes/checktype.c index fabc7b5..6ce1f69 100644 --- a/src/filetypes/checktype.c +++ b/src/filetypes/checktype.c @@ -12,14 +12,68 @@ enum check_type_return CannotRead, Unknown }; -static int laser_checktype_fd(int fd, const struct laser_magicnumber formats[]) + +int laser_checktype(const char *filename, const struct laser_filetype formats[]) +{ + if (laser_checktype_extension(filename, formats) == Success) + return 1; + + int fd = open(filename, O_RDONLY); + if (fd == -1) + { + perror("open"); + return 0; + } + + int rv = laser_checktype_magic(fd, formats); + switch (rv) + { + case NotOpened: + perror("open"); + break; + case CannotRead: + fprintf(stderr, "lsr: cannot read %s", filename); + break; + default: + break; + } + close(fd); + return rv == Success; +} + +int laser_checktype_extension(const char *filename, + const struct laser_filetype *formats) +{ + int rv = Unknown; + for (int i = 0; formats[i].extensions != NULL; i++) + { + for (size_t j = 0; j < formats[i].extensions_count; j++) + { + char *ext = strrchr(filename, '.') + 1; // +1 skip . + if (strcmp(ext, formats[i].extensions[j]) == 0) + rv = Success; + } + } + return rv; +} + +int laser_checktype_magic(int fd, const struct laser_filetype formats[]) { + + if (fd == -1) + { + perror("open"); + return 0; + } + off_t curr_pos = lseek(fd, 0, SEEK_CUR); if (fd == -1) { return NotOpened; } lseek(fd, 0, SEEK_SET); // seek to start + + // ---------------------------------------------------------------------- // should be updated if there is a magic number larger than 8 bits unsigned char buffer[8]; @@ -46,52 +100,3 @@ static int laser_checktype_fd(int fd, const struct laser_magicnumber formats[]) lseek(fd, curr_pos, SEEK_SET); return Unknown; } - -int laser_checktype(const char *filename, - const struct laser_magicnumber formats[]) -{ - int fd = open(filename, O_RDONLY); - if (fd == -1) - { - perror("open"); - return 0; - } - int rv = laser_checktype_fd(fd, formats); - switch (rv) - { - case NotOpened: - perror("open"); - break; - case CannotRead: - fprintf(stderr, "laser: cannot read %s", filename); - break; - default: - break; - } - close(fd); - return rv == Success; -} - -int laser_checktype_ex(int fd, const char *filename, - const struct laser_magicnumber formats[]) -{ - - if (fd == -1) - { - perror("open"); - return 0; - } - int rv = laser_checktype_fd(fd, formats); - switch (rv) - { - case NotOpened: - perror("open"); - break; - case CannotRead: - fprintf(stderr, "laser: cannot read %s", filename); - break; - default: - break; - } - return rv == Success; -} diff --git a/src/filetypes/documents.c b/src/filetypes/documents.c index aa83867..b8714dc 100644 --- a/src/filetypes/documents.c +++ b/src/filetypes/documents.c @@ -1,10 +1,13 @@ #include "filetypes/checktypes.h" -const struct laser_magicnumber laser_documentformats[] = { - {(unsigned char[]){'%', 'P', 'D', 'F'}, 4}, // PDF - {(unsigned char[]){0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}, - 8}, // compound bin file from microsoft (doc, xls, ppt, msi, msg) - {(unsigned char[]){0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}, - 8}, // DJVU +const struct laser_filetype laser_documentformats[] = { + {(unsigned char[]){'%', 'P', 'D', 'F'}, 4, (const char *[]){"pdf"}, + 1}, // PDF + {(unsigned char[]){0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}, 8, + (const char *[]){"doc", "xls", "ppt", "msi", "msg"}, + 5}, // compound bin file from microsoft + {(unsigned char[]){0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}, 8, + (const char *[]){"djvu", "djv"}, 2}, // DJVU // ADD DOCUMENT TYPES - {NULL, 0}}; + + {NULL, 0, NULL, 0}}; diff --git a/src/filetypes/medias.c b/src/filetypes/medias.c index 9fc25ba..994e69e 100644 --- a/src/filetypes/medias.c +++ b/src/filetypes/medias.c @@ -1,11 +1,15 @@ #include "filetypes/checktypes.h" -const struct laser_magicnumber laser_mediaformats[] = { - {(unsigned char[]){0xFF, 0xD8, 0xFF, 0xE0}, 4}, // JPEG - {(unsigned char[]){0x89, 0x50, 0x4E, 0x47}, 4}, // PNG - {(unsigned char[]){0x47, 0x49, 0x46, 0x38}, 4}, // GIF - {(unsigned char[]){0x49, 0x44, 0x33}, 3}, // MP3 - {(unsigned char[]){0x66, 0x74, 0x79, 0x70, 0x4D, 0x53, 0x4E, 0x56}, - 8}, // MP4 +const struct laser_filetype laser_mediaformats[] = { + {(unsigned char[]){0xFF, 0xD8, 0xFF, 0xE0}, 4, + (const char *[]){"jpeg", "jpg"}, 2}, // JPEG + {(unsigned char[]){0x89, 0x50, 0x4E, 0x47}, 4, (const char *[]){"png"}, + 1}, // PNG + {(unsigned char[]){0x47, 0x49, 0x46, 0x38}, 4, (const char *[]){"gif"}, + 1}, // GIF + {(unsigned char[]){0x49, 0x44, 0x33}, 3, (const char *[]){"mp3"}, 1}, // MP3 + {(unsigned char[]){0x66, 0x74, 0x79, 0x70, 0x4D, 0x53, 0x4E, 0x56}, 8, + (const char *[]){"mp4"}, 1}, // MP4 // ADD MORE MEDIA TYPES - {NULL, 0}}; + + {NULL, 0, NULL, 0}}; diff --git a/src/laser.c b/src/laser.c index 02bf18b..980d0f6 100644 --- a/src/laser.c +++ b/src/laser.c @@ -98,15 +98,13 @@ void laser_print_entry(struct laser_dirent *entry, const char *color, static laser_color_type laser_color_for_format(const char *filename) { - int fd = open(filename, O_RDONLY); laser_color_type type = LASER_COLOR_FILE; - if (laser_checktype_ex(fd, filename, laser_archiveformats)) + if (laser_checktype(filename, laser_archiveformats)) type = LASER_COLOR_ARCHIVE; - else if (laser_checktype_ex(fd, filename, laser_mediaformats)) + else if (laser_checktype(filename, laser_mediaformats)) type = LASER_COLOR_MEDIA; - else if (laser_checktype_ex(fd, filename, laser_documentformats)) + else if (laser_checktype(filename, laser_documentformats)) type = LASER_COLOR_DOCUMENT; - close(fd); return type; } From f4bdad2a51f2a2898616f57291c5691e19ccf700 Mon Sep 17 00:00:00 2001 From: jmattaa Date: Wed, 13 Nov 2024 16:54:51 +0100 Subject: [PATCH 2/2] refactor: format code --- src/filetypes/checktype.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/filetypes/checktype.c b/src/filetypes/checktype.c index 6ce1f69..a4d9e57 100644 --- a/src/filetypes/checktype.c +++ b/src/filetypes/checktype.c @@ -47,11 +47,11 @@ int laser_checktype_extension(const char *filename, int rv = Unknown; for (int i = 0; formats[i].extensions != NULL; i++) { - for (size_t j = 0; j < formats[i].extensions_count; j++) + for (size_t j = 0; j < formats[i].extensions_count; j++) { char *ext = strrchr(filename, '.') + 1; // +1 skip . if (strcmp(ext, formats[i].extensions[j]) == 0) - rv = Success; + rv = Success; } } return rv;