269 lines
7.3 KiB
JavaScript
269 lines
7.3 KiB
JavaScript
const axios = require("axios");
|
|
const cheerio = require("cheerio");
|
|
const ytdl = require("ytdl-core");
|
|
|
|
let parsers = [
|
|
{
|
|
src: "parsers/2015.js",
|
|
dates: [
|
|
{ year: 2015 },
|
|
{ year: 2016 },
|
|
{ year: 2017 },
|
|
{ year: 2018 },
|
|
{ year: 2019 }
|
|
]
|
|
}
|
|
];
|
|
|
|
module.exports = {
|
|
getCompatibleDates: getCompatibleDates, // get all parsible dates
|
|
handleArchiveUrl: handleArchiveUrl, // parser of web.archive.org urls
|
|
toNum: toNum, // ex: get "3K" to become 3000
|
|
fetchArchive: fetchArchive, // use wayback api to get snapshot of a URL
|
|
altUrl: altUrl, // fetch an alternative URL to the one given
|
|
isAlive: isAlive, // check if a youtube video is still alive
|
|
isArchived: isArchived, // check if a youtube video is archived
|
|
findThumbnail: findThumbnail // fetch the highest quality thumbnail
|
|
}
|
|
|
|
function getCompatibleDates() {
|
|
let allDates = [];
|
|
|
|
for (let i in parsers) {
|
|
let parser = parsers[i];
|
|
for (let a in parser.dates) {allDates.push(parser.dates[a]);}
|
|
}
|
|
|
|
return allDates;
|
|
}
|
|
|
|
function handleArchiveUrl(url) {
|
|
if (url == null) return null;
|
|
|
|
let parsedUrl = new URL(url);
|
|
let obj = {}; // data object to return
|
|
|
|
let date = parsedUrl.pathname.split(`/web/`)?.[1]?.split(`/`)?.[0];
|
|
if (!date) throw new Error("Invalid URL.");
|
|
|
|
if (date.endsWith("im_") || date.endsWith("if_")) {
|
|
obj.type = "direct";
|
|
date = date.substring(0, date.length - 3);
|
|
}
|
|
else obj.type = "friendly";
|
|
|
|
if (obj.type == "direct") {
|
|
obj.direct = parsedUrl.href;
|
|
obj.friendly = `https://web.archive.org/web/${date.split(`im_`)[0]}/${parsedUrl.href.split(`/web/`)?.[1]?.split(`/`)?.slice(1).join(`/`)}`
|
|
} else {
|
|
obj.friendly = parsedUrl.href;
|
|
obj.direct = `https://web.archive.org/web/${date}if_/${parsedUrl.href.split(`/web/`)?.[1]?.split(`/`)?.slice(1).join(`/`)}`
|
|
}
|
|
|
|
obj.date = {};
|
|
obj.date.year = parseInt(date.substring(0, 4));
|
|
obj.date.month = parseInt(date.substring(4, 6));
|
|
obj.date.day = parseInt(date.substring(6, 8));
|
|
obj.date.hour = parseInt(date.substring(8, 10));
|
|
obj.date.minute = parseInt(date.substring(10, 12));
|
|
obj.date.second = parseInt(date.substring(12, 14));
|
|
|
|
obj.url = parsedUrl.href.split(`/web/`)?.[1]?.split(`/`)?.slice(1).join(`/`);
|
|
|
|
if (
|
|
obj.url.includes("/user/") ||
|
|
obj.url.includes("/channel/") ||
|
|
obj.url.includes("/c/") ||
|
|
obj.url.includes("/profile?")
|
|
) {
|
|
if (obj.url.endsWith("/videos")) obj.urlType = "channel_videos";
|
|
else if (obj.url.endsWith("/playlists")) obj.urlType = "channel_playlists";
|
|
else if (obj.url.endsWith("/channels")) obj.urlType = "channel_featured_channels";
|
|
else if (obj.url.endsWith("/discussion")) obj.urlType = "channel_discussion";
|
|
else if (obj.url.endsWith("/about")) obj.urlType = "channel_about";
|
|
else obj.urlType = "channel";
|
|
}
|
|
else if (obj.url.includes("watch")) obj.urlType = "video";
|
|
else if (obj.url.includes("playlist")) obj.urlType = "playlist";
|
|
else obj.urlType = "unknown";
|
|
|
|
obj.parser = getParser(obj.date);
|
|
|
|
return obj;
|
|
}
|
|
|
|
function getParser(date) {
|
|
for (let i in parsers) {
|
|
for (let a in parsers[i].dates) {
|
|
let pDate = parsers[i].dates[a];
|
|
if (pDate.year == date.year) {
|
|
if (
|
|
!pDate.month ||
|
|
pDate.month == date.month
|
|
) return require(`${__dirname}/${parsers[i].src}`)
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function toNum(string) {
|
|
// to convert "3K" => 3000
|
|
|
|
if (typeof string !== "string") return string;
|
|
|
|
string = string.toLowerCase();
|
|
string = string.split(`,`).join(``) // remove commas
|
|
string = string.split(` `).join(``); // remove spacing (for langs that dont have commas)
|
|
string = string.split(`views`).join(``); // remove label if it's there
|
|
string = string.split(`переглядів`).join(``) // remove russian label
|
|
string = string.split(`\n`).join(``).split(`\t`).join(``); // remove any possible spacing
|
|
|
|
let mult = string.replace(/^[+-]?(\d*|\d{1,3}(,\d{3})*)(\.\d+)?/g,'')
|
|
let num = string.split(`k`).join(``).split(`m`).join(``).split(`b`).join(``).split(` `).join(``);
|
|
num = parseFloat(num);
|
|
|
|
switch(mult) {
|
|
case "k":
|
|
num = num * 1000;
|
|
return num;
|
|
|
|
case "m":
|
|
num = num * 1000000;
|
|
return num;
|
|
|
|
case "b":
|
|
num = num * 1000000000;
|
|
return num;
|
|
|
|
default: return num;
|
|
}
|
|
}
|
|
|
|
async function fetchArchive(url, date) {
|
|
try {
|
|
let dateString = ``;
|
|
|
|
if (date) {
|
|
if (date.year) dateString = `${date.year}`;
|
|
if (date.month) dateString = `${dateString}${pad(date.month, 2)}`;
|
|
if (date.day) dateString = `${dateString}${pad(date.day, 2)}`;
|
|
}
|
|
|
|
url = encodeURIComponent(url);
|
|
let resp = await axios({
|
|
url: `https://archive.org/wayback/available?url=${url}×tamp=${dateString}`,
|
|
validateStatus: function () {return true;}
|
|
});
|
|
|
|
if (!resp.data?.archived_snapshots?.closest?.status?.startsWith("2") && resp.data?.archived_snapshots?.closest?.status?.startsWith("3")) return null;
|
|
// prevents deadlinks
|
|
|
|
return (resp.data?.archived_snapshots?.closest?.url || null);
|
|
} catch(err) {
|
|
console.log(err);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function altUrl(url) {
|
|
try {
|
|
let resp = await axios({url: url, validateStatus: function() {return true}});
|
|
|
|
if (resp.status == 404) resp = await axios({url: fetchArchive(url), validateStatus: function() {return true}})
|
|
|
|
let $ = cheerio.load(resp.data);
|
|
let channel = $(`[itemprop="author"] > [itemprop="url"]`).attr("href");
|
|
return channel;
|
|
} catch(err) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function pad(num, size) {
|
|
var s = num + "";
|
|
while (s.length < size) s = "0" + s;
|
|
return s;
|
|
}
|
|
|
|
async function isAlive(url) {
|
|
try {
|
|
await ytdl.getBasicInfo(url);
|
|
return true;
|
|
} catch(err) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function isArchived(url, title) {
|
|
let results = [];
|
|
let status = false;
|
|
|
|
let id = url?.split(`v=`)?.[1]?.split(`&`)?.[0];
|
|
if (!id) id = url;
|
|
|
|
let direct = await fetchArchive(`http://wayback-fakeurl.archive.org/yt/${id}`);
|
|
if (direct !== null) {
|
|
status = true;
|
|
results.push(handleArchiveUrl(direct).direct);
|
|
}
|
|
|
|
let resp = await axios({
|
|
url: `https://archive.org/details/youtube-${id}`,
|
|
validateStatus: function() {return true}
|
|
});
|
|
|
|
if (resp.status == 200) {
|
|
status = true;
|
|
results.push(`https://archive.org/details/youtube-${id}`);
|
|
}
|
|
|
|
if (results.length == 0 && title) {
|
|
let resp = await axios({
|
|
url: `https://archive.org/search.php?query=${encodeURIComponent(title)}`
|
|
});
|
|
|
|
let $ = cheerio.load(resp.data);
|
|
let item = $(".C234 > div > a")?.[0];
|
|
let iTitle = $(".C234 > div > a > .ttl")?.[0]?.children?.[0]?.data;
|
|
if (item && title.toLowerCase() == iTitle.toLowerCase()) {
|
|
status = true;
|
|
|
|
let url = `https://archive.org${item?.attribs?.href}`;
|
|
results.push(url);
|
|
}
|
|
}
|
|
|
|
return {
|
|
success: status,
|
|
results: results
|
|
}
|
|
}
|
|
|
|
async function findThumbnail(url) {
|
|
let id = url?.split(`v=`)?.[1]?.split(`&`)?.[0];
|
|
if (!id) id = url;
|
|
|
|
let qualities = [
|
|
"maxresdefault",
|
|
"hqdefault"
|
|
];
|
|
|
|
let servers = [
|
|
"i.ytimg.com",
|
|
"i2.ytimg.com",
|
|
"i3.ytimg.com",
|
|
"i4.ytimg.com",
|
|
"img.ytimg.com",
|
|
];
|
|
|
|
for (let a in qualities) {
|
|
for (let i in servers) {
|
|
let archived = await fetchArchive({url: `https://${servers[i]}/vi/${id}/${qualities[a]}.jpg`});
|
|
if (archived !== null) return archived;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
} |