Skip to content

Commit

Permalink
feat: make downloading episodes more robust - detect file extensions …
Browse files Browse the repository at this point in the history
…from the file contents

We use the first few bytes in the audio file blob to determine the actual file type. This prevents
PodNotes from simply failing when the podcast providers don't provide file extensions and only
express the MIME type as `application/octet-stream`.
  • Loading branch information
chhoumann committed Mar 10, 2024
1 parent 83f3dd9 commit 352bdda
Showing 1 changed file with 92 additions and 13 deletions.
105 changes: 92 additions & 13 deletions src/downloadEpisode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ async function downloadFile(

return {
blob: new Blob([response.arrayBuffer], {
type: response.headers["content-type"] ?? response.headers["Content-Type"] ?? "",
type:
response.headers["content-type"] ??
response.headers["Content-Type"] ??
"",
}),
contentLength,
receivedLength: contentLength,
Expand All @@ -33,7 +36,7 @@ async function downloadFile(
} catch (error) {
const err = new Error(`Failed to download ${url}:\n\n${error.message}`);
options?.onError?.(err);

throw err;
}
}
Expand All @@ -52,20 +55,24 @@ export default async function downloadEpisodeWithNotice(
bodyEl.createEl("p", { text: "Downloading..." });
});

const { blob, responseUrl } = await downloadFile(episode.streamUrl, {
const { blob } = await downloadFile(episode.streamUrl, {
onFinished: () => {
update((bodyEl) =>
bodyEl.createEl("p", { text: "Download complete!" })
);
},
onError: (error) => {
update((bodyEl) =>
bodyEl.createEl("p", { text: `Download failed: ${error.message}` })
bodyEl.createEl("p", {
text: `Download failed: ${error.message}`,
})
);
}
},
});

if (!blob.type.contains("audio")) {
const fileExtension = await detectAudioFileExtension(blob);

if (!blob.type.contains("audio") || !fileExtension) {
update((bodyEl) => {
bodyEl.createEl("p", {
text: `Downloaded file is not an audio file. It is of type "${blob.type}". Blob: ${blob.size} bytes.`,
Expand All @@ -76,13 +83,13 @@ export default async function downloadEpisodeWithNotice(
}

try {
update((bodyEl) => bodyEl.createEl("p", { text: `Creating file...` }));
update((bodyEl) => bodyEl.createEl("p", { text: "Creating file..." }));

await createEpisodeFile({
episode,
downloadPathTemplate,
blob,
responseUrl,
extension: fileExtension,
});

update((bodyEl) =>
Expand Down Expand Up @@ -134,15 +141,15 @@ async function createEpisodeFile({
episode,
downloadPathTemplate,
blob,
responseUrl,
extension,
}: {
episode: Episode;
downloadPathTemplate: string;
blob: Blob;
responseUrl: string;
extension: string;
}) {
const basename = DownloadPathTemplateEngine(downloadPathTemplate, episode);
const filePath = `${basename}.${getUrlExtension(responseUrl)}`;
const filePath = `${basename}.${extension}`;

const buffer = await blob.arrayBuffer();

Expand All @@ -162,19 +169,91 @@ export async function downloadEpisode(
try {
const { blob, responseUrl } = await downloadFile(episode.streamUrl);

if (!blob.type.contains("audio")) {
const fileExtension = getUrlExtension(responseUrl);

if (!blob.type.contains("audio") || !fileExtension) {
throw new Error("Not an audio file.");
}

await createEpisodeFile({
episode,
downloadPathTemplate,
blob,
responseUrl,
extension: fileExtension,
});
} catch (error) {
throw new Error(
`Failed to download ${episode.title}: ${error.message}`
);
}
}

interface AudioSignature {
signature: number[];
mask?: number[];
fileExtension: string;
}

async function detectAudioFileExtension(blob: Blob): Promise<string | null> {
const audioSignatures: AudioSignature[] = [
{ signature: [0xff, 0xe0], mask: [0xff, 0xe0], fileExtension: "mp3" },
{ signature: [0x49, 0x44, 0x33], fileExtension: "mp3" },
{ signature: [0x52, 0x49, 0x46, 0x46], fileExtension: "wav" },
{ signature: [0x4f, 0x67, 0x67, 0x53], fileExtension: "ogg" },
{ signature: [0x66, 0x4c, 0x61, 0x43], fileExtension: "flac" },
{ signature: [0x4d, 0x34, 0x41, 0x20], fileExtension: "m4a" },
{
signature: [0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf, 0x11],
fileExtension: "wma",
},
{
signature: [0x23, 0x21, 0x41, 0x4d, 0x52, 0x0a],
fileExtension: "amr",
},
];

return new Promise((resolve, reject) => {
const fileReader = new FileReader();
fileReader.onloadend = (e) => {
if (!e.target?.result) {
reject(new Error("No result from file reader"));
return;
}

const arr = new Uint8Array(e.target.result as ArrayBuffer);

for (const { signature, mask, fileExtension } of audioSignatures) {
let matches = true;
for (let i = 0; i < signature.length; i++) {
if (mask) {
if ((arr[i] & mask[i]) !== (signature[i] & mask[i])) {
matches = false;
break;
}
} else {
if (arr[i] !== signature[i]) {
matches = false;
break;
}
}
}
if (matches) {
resolve(fileExtension);
return;
}
}
resolve(null);
};

fileReader.onerror = () => {
reject(fileReader.error);
};

fileReader.readAsArrayBuffer(
blob.slice(
0,
Math.max(...audioSignatures.map((sig) => sig.signature.length))
)
);
});
}

0 comments on commit 352bdda

Please sign in to comment.