Skip to content

Commit

Permalink
Merge pull request #30 from jmattaa/feat/file-extensions
Browse files Browse the repository at this point in the history
feat: add file extensions to filetype detection
  • Loading branch information
jmattaa authored Nov 13, 2024
2 parents 8bd5549 + f4bdad2 commit 0dc8bb7
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 88 deletions.
23 changes: 16 additions & 7 deletions include/filetypes/checktypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,27 @@

#include <stddef.h>

struct laser_magicnumber
// https://en.wikipedia.org/wiki/List_of_file_signatures
// dis be good sometimes

struct laser_filetype
{
unsigned char *magic;
size_t magic_size;

const char **extensions;
size_t extensions_count;
};

int laser_checktype(const char *filename,
const struct laser_magicnumber formats[]);
int laser_checktype_ex(int fd, const char *filename,
const struct laser_magicnumber formats[]);
extern const struct laser_magicnumber laser_archiveformats[];
extern const struct laser_magicnumber laser_mediaformats[];
extern const struct laser_magicnumber laser_documentformats[];
const struct laser_filetype formats[]);

int laser_checktype_extension(const char *filename,
const struct laser_filetype formats[]);
int laser_checktype_magic(int fd, const struct laser_filetype formats[]);

extern const struct laser_filetype laser_archiveformats[];
extern const struct laser_filetype laser_mediaformats[];
extern const struct laser_filetype laser_documentformats[];

#endif
34 changes: 23 additions & 11 deletions src/filetypes/archives.c
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
#include "filetypes/checktypes.h"

const struct laser_magicnumber laser_archiveformats[] = {
{(unsigned char[]){0x50, 0x4B, 0x03, 0x04}, 4}, // ZIP and stuff based on it
{(unsigned char[]){0x1F, 0x8B}, 2}, // GZIP
{(unsigned char[]){'u', 's', 't', 'a', 'r'}, 5}, // TAR
{(unsigned char[]){0x42, 0x5A, 0x68}, 3}, // BZIP2
{(unsigned char[]){0xFD, 0x37, 0x7A, 0x58, 0x5A}, 5}, // XZ
{(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00},
7}, // RAR v1.5 to 4.0
{(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
8}, // RAR v5+
const struct laser_filetype laser_archiveformats[] = {

{
(unsigned char[]){0x50, 0x4B, 0x03, 0x04},
4,
(const char *[]){"zip", "aar", "apk", "docx", "epub", "ipa", "jar",
"kmz", "maff", "msix", "odp", "ods", "odt", "pk3",
"pk4", "pptx", "usdz", "vsdx", "xlsx", "xpi"},
20,
}, // ZIP and stuff based on it
{(unsigned char[]){0x1F, 0x8B}, 2, (const char *[]){"gz"}, 1}, // GZIP
{(unsigned char[]){'u', 's', 't', 'a', 'r'}, 5, (const char *[]){"tar"},
1}, // TAR
{(unsigned char[]){0x42, 0x5A, 0x68}, 3, (const char *[]){"bz2"},
1}, // BZIP2
{(unsigned char[]){0xFD, 0x37, 0x7A, 0x58, 0x5A}, 5, (const char *[]){"xz"},
1}, // XZ
{(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}, 7,
(const char *[]){"rar"}, 1}, // RAR v1.5 to 4.0
{(unsigned char[]){0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00}, 8,
(const char *[]){"rar"}, 1}, // RAR v5+
// ADD MORE ARCHIVES
{NULL, 0}};

{NULL, 0, NULL, 0}};
105 changes: 55 additions & 50 deletions src/filetypes/checktype.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,68 @@ enum check_type_return
CannotRead,
Unknown
};
static int laser_checktype_fd(int fd, const struct laser_magicnumber formats[])

int laser_checktype(const char *filename, const struct laser_filetype formats[])
{
if (laser_checktype_extension(filename, formats) == Success)
return 1;

int fd = open(filename, O_RDONLY);
if (fd == -1)
{
perror("open");
return 0;
}

int rv = laser_checktype_magic(fd, formats);
switch (rv)
{
case NotOpened:
perror("open");
break;
case CannotRead:
fprintf(stderr, "lsr: cannot read %s", filename);
break;
default:
break;
}
close(fd);
return rv == Success;
}

int laser_checktype_extension(const char *filename,
const struct laser_filetype *formats)
{
int rv = Unknown;
for (int i = 0; formats[i].extensions != NULL; i++)
{
for (size_t j = 0; j < formats[i].extensions_count; j++)
{
char *ext = strrchr(filename, '.') + 1; // +1 skip .
if (strcmp(ext, formats[i].extensions[j]) == 0)
rv = Success;
}
}
return rv;
}

int laser_checktype_magic(int fd, const struct laser_filetype formats[])
{

if (fd == -1)
{
perror("open");
return 0;
}

off_t curr_pos = lseek(fd, 0, SEEK_CUR);
if (fd == -1)
{
return NotOpened;
}
lseek(fd, 0, SEEK_SET); // seek to start

// ----------------------------------------------------------------------
// should be updated if there is a magic number larger than 8 bits
unsigned char buffer[8];

Expand All @@ -46,52 +100,3 @@ static int laser_checktype_fd(int fd, const struct laser_magicnumber formats[])
lseek(fd, curr_pos, SEEK_SET);
return Unknown;
}

int laser_checktype(const char *filename,
const struct laser_magicnumber formats[])
{
int fd = open(filename, O_RDONLY);
if (fd == -1)
{
perror("open");
return 0;
}
int rv = laser_checktype_fd(fd, formats);
switch (rv)
{
case NotOpened:
perror("open");
break;
case CannotRead:
fprintf(stderr, "laser: cannot read %s", filename);
break;
default:
break;
}
close(fd);
return rv == Success;
}

int laser_checktype_ex(int fd, const char *filename,
const struct laser_magicnumber formats[])
{

if (fd == -1)
{
perror("open");
return 0;
}
int rv = laser_checktype_fd(fd, formats);
switch (rv)
{
case NotOpened:
perror("open");
break;
case CannotRead:
fprintf(stderr, "laser: cannot read %s", filename);
break;
default:
break;
}
return rv == Success;
}
17 changes: 10 additions & 7 deletions src/filetypes/documents.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#include "filetypes/checktypes.h"

const struct laser_magicnumber laser_documentformats[] = {
{(unsigned char[]){'%', 'P', 'D', 'F'}, 4}, // PDF
{(unsigned char[]){0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1},
8}, // compound bin file from microsoft (doc, xls, ppt, msi, msg)
{(unsigned char[]){0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D},
8}, // DJVU
const struct laser_filetype laser_documentformats[] = {
{(unsigned char[]){'%', 'P', 'D', 'F'}, 4, (const char *[]){"pdf"},
1}, // PDF
{(unsigned char[]){0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}, 8,
(const char *[]){"doc", "xls", "ppt", "msi", "msg"},
5}, // compound bin file from microsoft
{(unsigned char[]){0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}, 8,
(const char *[]){"djvu", "djv"}, 2}, // DJVU
// ADD DOCUMENT TYPES
{NULL, 0}};

{NULL, 0, NULL, 0}};
20 changes: 12 additions & 8 deletions src/filetypes/medias.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
#include "filetypes/checktypes.h"

const struct laser_magicnumber laser_mediaformats[] = {
{(unsigned char[]){0xFF, 0xD8, 0xFF, 0xE0}, 4}, // JPEG
{(unsigned char[]){0x89, 0x50, 0x4E, 0x47}, 4}, // PNG
{(unsigned char[]){0x47, 0x49, 0x46, 0x38}, 4}, // GIF
{(unsigned char[]){0x49, 0x44, 0x33}, 3}, // MP3
{(unsigned char[]){0x66, 0x74, 0x79, 0x70, 0x4D, 0x53, 0x4E, 0x56},
8}, // MP4
const struct laser_filetype laser_mediaformats[] = {
{(unsigned char[]){0xFF, 0xD8, 0xFF, 0xE0}, 4,
(const char *[]){"jpeg", "jpg"}, 2}, // JPEG
{(unsigned char[]){0x89, 0x50, 0x4E, 0x47}, 4, (const char *[]){"png"},
1}, // PNG
{(unsigned char[]){0x47, 0x49, 0x46, 0x38}, 4, (const char *[]){"gif"},
1}, // GIF
{(unsigned char[]){0x49, 0x44, 0x33}, 3, (const char *[]){"mp3"}, 1}, // MP3
{(unsigned char[]){0x66, 0x74, 0x79, 0x70, 0x4D, 0x53, 0x4E, 0x56}, 8,
(const char *[]){"mp4"}, 1}, // MP4
// ADD MORE MEDIA TYPES
{NULL, 0}};

{NULL, 0, NULL, 0}};
8 changes: 3 additions & 5 deletions src/laser.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,13 @@ void laser_print_entry(struct laser_dirent *entry, const char *color,

static laser_color_type laser_color_for_format(const char *filename)
{
int fd = open(filename, O_RDONLY);
laser_color_type type = LASER_COLOR_FILE;
if (laser_checktype_ex(fd, filename, laser_archiveformats))
if (laser_checktype(filename, laser_archiveformats))
type = LASER_COLOR_ARCHIVE;
else if (laser_checktype_ex(fd, filename, laser_mediaformats))
else if (laser_checktype(filename, laser_mediaformats))
type = LASER_COLOR_MEDIA;
else if (laser_checktype_ex(fd, filename, laser_documentformats))
else if (laser_checktype(filename, laser_documentformats))
type = LASER_COLOR_DOCUMENT;
close(fd);
return type;
}

Expand Down

0 comments on commit 0dc8bb7

Please sign in to comment.