280 lines
11 KiB
JavaScript
280 lines
11 KiB
JavaScript
import axios from "axios";
|
|
import * as cheerio from "cheerio";
|
|
import twocaptcha from "2captcha";
|
|
import fs from "fs";
|
|
|
|
const config = JSON.parse(fs.readFileSync(`${process.cwd()}/config.json`));
|
|
|
|
async function create(url, {wayback, archivedotis, log}, onUpdate) {
|
|
if (!onUpdate) onUpdate = function() {};
|
|
|
|
if (log != false) console.log(`request to create archive recieved`);
|
|
if (log != false) console.log(`- url:`, url);
|
|
if (log != false) console.log(`- archive.org:`, wayback);
|
|
if (log != false) console.log(`- archive.is:`, archivedotis);
|
|
|
|
let wba, adi;
|
|
|
|
if (wayback == true) {
|
|
try {
|
|
if (log != false) console.log(`[archiver] sending request to backup to the wayback machine`);
|
|
onUpdate({currentArchiver: "wayback", status: "pending", message: "starting process"});
|
|
wba = await archiveorg(url, {log, captureAll: true}, onUpdate);
|
|
onUpdate({currentArchiver: "wayback", status: wba.status, url: wba.url});
|
|
} catch(err) {
|
|
onUpdate({currentArchiver: "wayback", status: "error", message: (err.message || err.stack || err.code)});
|
|
}
|
|
} else wba = { status: "disabled" };
|
|
|
|
if (archivedotis == true) {
|
|
try {
|
|
if (log != false) console.log(`[archiver] sending request to backup to archive.is`);
|
|
onUpdate({currentArchiver: "archivedotis", status: "starting"});
|
|
adi = await archiveis(url, {log}, onUpdate);
|
|
onUpdate({currentArchiver: "archivedotis", status: adi.status, url: adi.url});
|
|
} catch(err) {
|
|
onUpdate({currentArchiver: "archivedotis", status: "error", message: (err.message || err.stack || err.code)});
|
|
}
|
|
} else adi = { status: "disabled" };
|
|
|
|
return { wayback: wba, archivedotis: adi };
|
|
}
|
|
|
|
async function archiveorg(captureUrl, {log, captureAll}, onUpdate) {
|
|
let data = `url=${encodeURIComponent(captureUrl)}`;
|
|
|
|
if (captureAll != false) data = `${data}&captcha_all=on`;
|
|
else data = `${data}&captcha_all=off`;
|
|
let dataLength = encodeURI(data).split(/%..|./).length - 1;
|
|
|
|
return new Promise(async function(resolve, reject) {
|
|
try {
|
|
if (log != false) console.log(`[wayback] sending initial request to begin archiving...`);
|
|
|
|
let init = await axios({
|
|
method: `POST`,
|
|
url: `https://web.archive.org/save/${encodeURIComponent(captureUrl)}`,
|
|
data,
|
|
headers: {
|
|
"Content-Length": dataLength,
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
"Referer": "https://web.archive.org/save"
|
|
}
|
|
});
|
|
|
|
let spinJob = init.data?.split(`spn.watchJob("`)?.[1]?.split(`",`)?.[0];
|
|
if (!spinJob) {
|
|
let $ = cheerio.load(init.data);
|
|
if ($(".error.save-page-form")[0]) {
|
|
let error = $(".error.save-page-form").text();
|
|
reject(error);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (log != false) console.log(`[wayback] got response from request, spin job id:`, spinJob);
|
|
|
|
let interval = setInterval(async function() {
|
|
let {data} = await axios({
|
|
method: `GET`,
|
|
url: `https://web.archive.org/save/status/${spinJob}?t=${(new Date()).toString()}`
|
|
});
|
|
|
|
if (data.status == "success") {
|
|
clearInterval(interval);
|
|
resolve({ status: "success", url: `https://web.archive.org/web/${data.timestamp}/${data.original_url}`, duration: data.duration_sec * 1000 });
|
|
} else {
|
|
if (data.status == "pending") onUpdate({currentArchiver: "wayback", status: "pending", message: "archiving"});
|
|
else if (data.status == "error") {
|
|
clearInterval(interval);
|
|
reject(new Error(data.message));
|
|
} else {
|
|
clearInterval(interval);
|
|
reject(new Error("cannot parse job response."));
|
|
}
|
|
}
|
|
}, 6000);
|
|
} catch(err) {
|
|
if (!err.axiosError) reject(err);
|
|
}
|
|
});
|
|
}
|
|
|
|
async function archiveis(captureUrl, {log, autoAgain}, onUpdate) {
|
|
if (!autoAgain) autoAgain = true;
|
|
|
|
if (log != false) console.log(`[archiveis] fetching initial request to get request id`);
|
|
|
|
onUpdate({currentArchiver: "archivedotis", status: "pending", message: "fetching request id"});
|
|
return new Promise(async function(resolve, reject) {
|
|
try {
|
|
let res = await axios({
|
|
url: `https://archive.ph/`,
|
|
headers: {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Connection": "keep-alive",
|
|
"DNT": "1",
|
|
"Sec-Fetch-Dest": "document",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-Site": "cross-site",
|
|
"Sec-GPC": "1",
|
|
"TE": "trailers",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0"
|
|
}
|
|
});
|
|
|
|
let $ = cheerio.load(res.data);
|
|
let subId = $(`[name="submitid"]`).val();
|
|
|
|
if (log != false) console.log(`[archiveis] got submit id:`, subId);
|
|
let subUrl = `https://archive.ph/submit/?anyway=1&submitid=${subId}&url=${encodeURIComponent(captureUrl)}`
|
|
if (log != false) console.log(`[archiveis] got submit url:`, subUrl);
|
|
|
|
onUpdate({currentArchiver: "archivedotis", status: "pending", message: "initializing submission"});
|
|
let msStart = (new Date() * 1);
|
|
|
|
res = await axios({
|
|
url: subUrl,
|
|
headers: {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Connection": "keep-alive",
|
|
"DNT": "1",
|
|
"Referer": "https://archive.ph/",
|
|
"Sec-Fetch-Dest": "document",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-Site": "cross-site",
|
|
"Sec-GPC": "1",
|
|
"TE": "trailers",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0"
|
|
},
|
|
validateStatus: function() {return true;}
|
|
});
|
|
|
|
$ = cheerio.load(res.data);
|
|
let title = $("title")?.text();
|
|
|
|
let wipUrl, cookie;
|
|
|
|
if (typeof title == "string") {
|
|
onUpdate({currentArchiver: "archivedotis", status: "pending", message: "sending request for captcha"});
|
|
if (log != false) console.log(`[archiveis] sending captcha request`);
|
|
// captcha page
|
|
let tc = new twocaptcha.Solver(config["2captcha"]);
|
|
let sk = res.data.split(`'sitekey': '`)?.[1]?.split(`'`)?.[0];
|
|
onUpdate({currentArchiver: "archivedotis", status: "pending", message: "solving captcha"})
|
|
if (log != false) console.log(`[archiveis] getting captcha solution`);
|
|
let captcha = await (await tc.recaptcha(sk, subUrl)).data;
|
|
|
|
let captData = `response=${encodeURIComponent(captcha)}&location=${subUrl}`;
|
|
let dataLength = encodeURI(captData).split(/%..|./).length - 1;
|
|
|
|
let {headers} = await axios({
|
|
method: `POST`,
|
|
url: `https://archive.ph/cdn-cgi/l/chk_captcha`,
|
|
data: captData,
|
|
headers: {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Connection": "keep-alive",
|
|
"Content-Length": dataLength,
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
"DNT": "1",
|
|
"Referer": "https://archive.ph/",
|
|
"Sec-Fetch-Dest": "empty",
|
|
"Sec-Fetch-Mode": "cors",
|
|
"Sec-Fetch-Site": "same-origin",
|
|
"Sec-GPC": "1",
|
|
"TE": "trailers",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0"
|
|
}
|
|
});
|
|
|
|
let sc = headers["set-cookie"]?.[0]?.split(`;`)[0];
|
|
cookie = `${encodeURIComponent(sc.split(`=`)[0])}=${encodeURIComponent(sc.split(`=`)[1])}`;
|
|
onUpdate({currentArchiver: "archivedotis", status: "pending", message: "resending submission"});
|
|
msStart = (new Date() * 1);
|
|
if (log != false) console.log(`[archiveis] resending archive request`);
|
|
|
|
res = await axios({
|
|
url: subUrl,
|
|
headers: {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Connection": "keep-alive",
|
|
"Cookie": cookie,
|
|
"DNT": "1",
|
|
"Referer": "https://archive.ph/",
|
|
"Sec-Fetch-Dest": "document",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-Site": "cross-site",
|
|
"Sec-GPC": "1",
|
|
"TE": "trailers",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0"
|
|
},
|
|
validateStatus: function() {return true;}
|
|
});
|
|
|
|
if (res.headers["refresh"]) wipUrl = res.headers["refresh"].split(`url=`).slice(1).join(`url=`);
|
|
else {
|
|
let $ = cheerio.load(res.data);
|
|
let url = $(`link[rel="canonical"]`).attr("href");
|
|
resolve({ status: "success", url, duration: null, new: false });
|
|
}
|
|
}
|
|
|
|
let updateNum = 1;
|
|
|
|
let interval = setInterval(async function() {
|
|
try {
|
|
let {headers} = await axios({
|
|
method: "GET",
|
|
url: wipUrl,
|
|
validateStatus: function() {return true;},
|
|
headers: {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Connection": "keep-alive",
|
|
"Cookie": cookie,
|
|
"DNT": "1",
|
|
"Referer": wipUrl,
|
|
"Sec-Fetch-Dest": "document",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-Site": "cross-site",
|
|
"Sec-GPC": "1",
|
|
"TE": "trailers",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0"
|
|
}
|
|
});
|
|
|
|
if (headers.link) {
|
|
onUpdate({currentArchiver: "archivedotis", status: "pending", message: "parsing result page"});
|
|
let wipParsed = new URL(wipUrl);
|
|
let id = wipParsed.pathname.split(`/`)[2];
|
|
if (log != false) console.log(`[archiveis] got id:`, id);
|
|
clearInterval(interval);
|
|
resolve({ status: "success", url: `https://archive.ph/${id}`, duration: ((new Date() * 1) - msStart), new: true });
|
|
} else {
|
|
if (log != false) console.log(`[archiveis] no redirect found, refreshing in 5 seconds`);
|
|
updateNum = updateNum + 1;
|
|
onUpdate({currentArchiver: "archivedotis", status: "pending", message: `archiving (request ${updateNum})`});
|
|
}
|
|
} catch(err) {
|
|
console.log(err);
|
|
}
|
|
}, 5000);
|
|
} catch(err) {
|
|
reject(err);
|
|
}
|
|
});
|
|
}
|
|
|
|
export default {
|
|
create
|
|
}; |