From aec7db0183ada9cec57d55b53acede8efe1c51d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matt=C3=A9o=20Delabre?= Date: Sun, 29 Nov 2020 21:05:21 +0100 Subject: [PATCH] Improve behavior to reduce rate limiting issues --- lib/api.mjs | 18 +++++++++++++++--- lib/explore.mjs | 24 +++++++++--------------- lib/retry.mjs | 42 +++++++++++++++++++++++++++++++++++++----- lib/util.mjs | 46 ---------------------------------------------- 4 files changed, 61 insertions(+), 69 deletions(-) diff --git a/lib/api.mjs b/lib/api.mjs index c9d0723..6f159bd 100644 --- a/lib/api.mjs +++ b/lib/api.mjs @@ -3,6 +3,7 @@ import util from 'util'; import fetch from 'node-fetch'; import { WATCH_BASE } from './util.mjs'; +import { retryable, exclusive } from './retry.mjs'; import { TemporaryError } from './retry.mjs'; const log = debug('youtube-maze:api'); @@ -16,13 +17,21 @@ const PLAYER_CONFIG_REGEX = /ytplayer\.config = (\{.*?\});/; * @param String videoId Identifier of the video to fetch. * @return Object The player configuration object. */ -export const getPlayerConfig = async (videoId) => +const bareGetPlayerConfig = async (videoId) => { const url = util.format(WATCH_BASE, encodeURIComponent(videoId)); log(`Fetching ${videoId} (${url})`); - const res = await fetch(url); - debugger; + + if (res.status === 429) + { + throw new TemporaryError('Too many requests'); + } + else if (res.status !== 200) + { + throw new Error(`Invalid YouTube HTTP status: ${res.status}`); + } + const body = await res.text(); // Look for the initial player response object to check whether the @@ -60,9 +69,12 @@ reason: "${response.playabilityStatus.reason}"`); throw new Error(`Video ${videoId} has actual id ${actualId}`); } + log(`Done fetching ${videoId}`); return config; }; +export const getPlayerConfig = exclusive(retryable(bareGetPlayerConfig)); + /** * Get metadata about a YouTube video. * diff --git a/lib/explore.mjs b/lib/explore.mjs index ddfa724..927d6b3 100644 --- a/lib/explore.mjs +++ b/lib/explore.mjs @@ -1,13 +1,10 @@ import util from 'util'; import * as api from './api.mjs'; -import { WATCH_BASE, PromiseQueue, escapeQuotes } from './util.mjs'; -import { retryable } from '../lib/retry.mjs'; +import { WATCH_BASE, escapeQuotes } from './util.mjs'; const GRAPH_NODE = ' "%s" [label="%s", URL="%s", fontcolor=blue]'; const GRAPH_LINK = ' "%s" -> "%s"'; -const retryPlayerConfig = retryable(api.getPlayerConfig); - /** * Explore the video graph starting from the given root. * @@ -19,23 +16,22 @@ export const exploreVideos = async videoId => { // Store metadata about each visited video const videosNodes = Object.create(null); - videosNodes[videoId] = {}; // List of videos linked from each video either through a card or an // endscreen item const nextVideos = Object.create(null); - nextVideos[videoId] = new Set(); - // Pending video requests - const queue = new PromiseQueue(); - queue.add(retryPlayerConfig(videoId)); + // Videos that still need to be explored + const queue = [videoId]; - while (!queue.empty()) + while (queue.length > 0) { - const config = await queue.next(); + const currentId = queue.shift(); + const config = await api.getPlayerConfig(currentId); const meta = api.getVideoMeta(config); - videosNodes[meta.videoId] = meta; + videosNodes[currentId] = meta; + nextVideos[currentId] = new Set(); // Add links between this video and the linked ones api.getEndScreenVideos(config) @@ -47,9 +43,7 @@ export const exploreVideos = async videoId => { if (!(nextId in videosNodes)) { - videosNodes[nextId] = {}; - nextVideos[nextId] = new Set(); - queue.add(retryPlayerConfig(nextId)); + queue.push(nextId); } } } diff --git a/lib/retry.mjs b/lib/retry.mjs index 3ff4d3e..e5a35be 100644 --- a/lib/retry.mjs +++ b/lib/retry.mjs @@ -4,8 +4,10 @@ import { sleep } from './util.mjs'; const log = debug('youtube-maze:retry'); /** - * An error that is expected to be temporary such that the initial action - * may be retried. + * An error that is expected to be temporary. + * + * If this error is raised, the initial action may be retried after a short + * period of time, and may eventually succeed. */ export class TemporaryError extends Error { @@ -32,11 +34,11 @@ export const retryable = (func, retries = 3, cooldown = 1000) => { return async (...args) => { + let remRetries = retries; + let curCooldown = cooldown; + while (true) { - let remRetries = retries; - let curCooldown = cooldown; - try { const result = await func(...args); @@ -62,3 +64,33 @@ Retrying in ${curCooldown} ms (${remRetries} retries remaining)`); } }; }; + +/** + * Make an async function mutually exclusive. + * + * Only one execution of the async function may happen at the same time. + * In the meantime, other requests are added to a queue. + * + * @param function func Async function to call. + * @param number cooldown Time to wait before two executions. + */ +export const exclusive = (func, cooldown = 2000) => +{ + let pending = null; + + return async (...args) => + { + if (pending === null) + { + pending = func(...args); + } + else + { + pending = pending + .then(() => sleep(cooldown)) + .then(() => func(...args)); + } + + return pending; + }; +}; diff --git a/lib/util.mjs b/lib/util.mjs index 49d9ec4..f1302b3 100644 --- a/lib/util.mjs +++ b/lib/util.mjs @@ -3,52 +3,6 @@ import util from 'util'; export const YOUTUBE_BASE = 'https://www.youtube.com/%s'; export const WATCH_BASE = util.format(YOUTUBE_BASE, 'watch?v=%s'); -/** - * Hold a queue of promises from which results can be extracted. - */ -export class PromiseQueue -{ - constructor() - { - this.pending = new Set(); - } - - /** - * Add a new promise to the queue. - * - * @param Promise promise Promise to be added. - */ - add(promise) - { - const wrapped = promise.then(res => { - this.pending.delete(wrapped); - return res; - }); - - this.pending.add(wrapped); - } - - /** - * Check whether there is no promise pending in the queue. - * - * @return boolean - */ - empty() - { - return this.pending.size === 0; - } - - /** - * Extract the next available result from the promise queue. - * - * @return any Next result. - */ - next() - { - return Promise.race(this.pending); - } -} - /** * Escape double quotes in a string. */