yt-forager/cli.js

244 lines
9 KiB
JavaScript
Executable file

#!/usr/bin/env node
const lib = require("./lib");
const minimist = require("minimist");
const ora = require("ora");
const fs = require("fs");
run();
async function run() {
let slicer;
if (process.argv[0].includes("node")) slicer = 2;
else slicer = 1;
let args = minimist(process.argv.slice(slicer));
for (let i in args._) {
let url = args._[i];
console.log(`[runner] Fetching compatible archive for "${url}"...`);
let videos = [];
let prevTest;
let output = (args.output || args.o || null);
let spinner = ora("Placeholder");
try {
let compat = lib.getCompatibleDates();
let type;
for (let a in compat) {
console.log(`[runner] Trying to find archive for "${url}" at`, compat[a])
let archive = await lib.fetchArchive(url, compat[a]);
if (!archive) continue;
if (archive == prevTest) continue;
let parsedArchive = lib.handleArchiveUrl(archive);
type = parsedArchive.urlType;
if (parsedArchive?.parser) {
console.log(`[runner] Fetching and parsing archive for "${url}" at`, compat[a]);
let parsed = await parsedArchive.parser.parse(parsedArchive.direct);
switch(type) {
case "channel":
for (let x in parsed?.videos) {
if (videos.find(y => y.id == parsed?.videos?.[x]?.id) == null) {
parsed.videos[x].source = compat[a];
videos.push(parsed.videos[x]);
}
}
prevTest = archive;
let alt;
if (
!args["avoid-alt-url"] == true &&
!args["a"] == true
) {
console.log(`[runner] Fetching alternative URL...`);
alt = await lib.altUrl(url);
}
if (alt !== url && alt !== undefined) {
console.log(`[runner] Trying to find archive for "${alt}" at`, compat[a])
let archive = await lib.fetchArchive(alt, compat[a]);
if (!archive) continue;
let parsedArchive = lib.handleArchiveUrl(archive);
type = parsedArchive.urlType;
console.log(`[runner] Fetching and parsing archive for "${alt}" at`, compat[a]);
let parsed = await parsedArchive.parser.parse(parsedArchive.direct);
for (let x in parsed?.videos) {
if (videos.find(y => y.id == parsed?.videos?.[x]?.id) == null) {
parsed.videos[x].source = compat[a];
videos.push(parsed.videos[x]);
}
}
}
break;
case "video":
if (parsed?.title) {
spinner.text = `Checking if "${parsed.title}" (${parsed.id}) is still alive...`;
spinner.start();
let alive = await lib.isAlive(parsed.id);
if (alive == true) {
spinner.text = `"${parsed.title}" (${parsed.id}) is still alive. Dumping JSON data...`;
spinner.succeed();
if (output == null) console.log(parsed);
else fs.writeFileSync(output, JSON.stringify(parsed, null, 2));
process.exit();
} else {
spinner.text = `Checking if "${parsed.title}" (${parsed.id}) is available on Archive.org...`;
let archived = await lib.isArchived(parsed.id);
let thumbnail = await lib.findThumbnail(parsed.id);
if (archived.success == true) {
spinner.text = `Found "${parsed.title}" (${parsed.id}) is on Archive.org. Dumping JSON data...`;
spinner.succeed();
parsed.findable = archived.results;
parsed.thumbnail = thumbnail;
if (output == null) console.log(parsed);
else fs.writeFileSync(output, JSON.stringify(parsed, null, 2));
process.exit();
} else {
spinner.text = `Found "${parsed.title}" (${parsed.id}) is on Archive.org. Dumping metadata JSON data...`;
parsed.thumbnail = thumbnail;
if (output == null) console.log(parsed);
else fs.writeFileSync(output, JSON.stringify(parsed, null, 2));
process.exit();
}
}
}
break;
}
} else if (type.startsWith("channel")) {
let alt;
if (
!args["avoid-alt-url"] == true &&
!args["a"] == true
) {
console.log(`[runner] Fetching alternative URL...`);
alt = await lib.altUrl(url);
}
if (alt !== url && alt !== undefined) {
console.log(`[runner] Trying to find archive for "${alt}" at`, compat[a])
let archive = await lib.fetchArchive(alt, compat[a]);
if (!archive) continue;
let parsedArchive = lib.handleArchiveUrl(archive);
type = parsedArchive?.urlType;
if (!type) continue;
console.log(`[runner] Fetching and parsing archive for "${alt}" at`, compat[a]);
let parsed = await parsedArchive.parser?.parse(parsedArchive.direct);
for (let x in parsed?.videos) {
if (videos.find(y => y.id == parsed?.videos?.[x]?.id) == null) {
parsed.videos[x].source = compat[a];
videos.push(parsed.videos[x]);
}
}
}
}
}
let data = {};
data.available = [];
data.archived = [];
data.lost = [];
switch(type) {
case "channel":
console.log(`\n\nFound ${videos.length} videos.`);
if (videos.length > 0) {
console.log(`Checking if any are missing now (and if so, if they are on archive.org)...`);
}
for (let i in videos) {
spinner.start();
spinner.text = `Checking videos (${i}/${videos.length})...`;
spinner.color = "red";
let alive = await lib.isAlive(videos[i].id);
if (alive == true) {
spinner.text = `"${videos[i].title}" (${videos[i].id}) is still on YouTube.`;
data.available.push(videos[i]);
spinner.succeed();
continue;
} else {
spinner.text = `Checking archive.org for "${videos[i].title}" (${videos[i].id})...`;
let archived = await lib.isArchived(videos[i].id, videos[i].title);
if (archived.success == true) {
spinner.text = `"${videos[i].title}" (${videos[i].id}) is gone from YouTube but is archived.`;
spinner.succeed();
let old = videos[i];
let thumbnail;
spinner.start();
spinner.text = `Attempting to retrieve addtional metadata...`;
try {
spinner.text = `Attempting to retrieve addtional metadata (fetching thumbnail for id "${videos[i].id}")...`;
thumbnail = await lib.findThumbnail(videos[i].id);
spinner.text = `Attempting to retrieve addtional metadata (getting video page archive info)...`;
let vidArchive = await lib.fetchArchive(`https://www.youtube.com/watch?v=${videos[i].id}`, videos[i].source);
if (vidArchive !== null) {
let vidArchiveParsed = lib.handleArchiveUrl(vidArchive);
let meta;
spinner.text = `Attempting to retrieve addtional metadata (getting video page)...`;
if (vidArchiveParsed.parser !== null) meta = await vidArchiveParsed.parser.parse(vidArchiveParsed.direct);
if (meta !== null && meta !== undefined) videos[i] = meta;
}
} catch(err) {
videos[i] = old;
spinner.text = `Failed to get additional metadata, skipping...`;
spinner.fail();
spinner.stop();
console.log(err);
}
data.archived.push({
...videos[i],
findable: archived.results,
thumbnail: thumbnail
});
console.log(`\n\nMore info:`, { ...videos[i], findable: archived.results, thumbnail: thumbnail }, `\n\n`);
continue;
} else {
spinner.text = `"${videos[i].title}" (${videos[i].id}) is gone from YouTube and seemingly not on archive.org.`;
data.lost.push(videos[i]);
spinner.fail();
continue;
}
}
}
if (output !== null) fs.writeFileSync(output, JSON.stringify(data, null, 2));
else console.log(data);
break;
}
} catch(err) {
if (spinner.isSpinning == true) spinner.stop()
console.log(`\n[runner] Error:`, err);
}
}
}