Improve behavior to reduce rate limiting issues

This commit is contained in:
Mattéo Delabre 2020-11-29 21:05:21 +01:00
parent 731b51631d
commit aec7db0183
Signed by: matteo
GPG Key ID: AE3FBD02DC583ABB
4 changed files with 61 additions and 69 deletions

View File

@ -3,6 +3,7 @@ import util from 'util';
import fetch from 'node-fetch';
import { WATCH_BASE } from './util.mjs';
import { retryable, exclusive } from './retry.mjs';
import { TemporaryError } from './retry.mjs';
const log = debug('youtube-maze:api');
@ -16,13 +17,21 @@ const PLAYER_CONFIG_REGEX = /ytplayer\.config = (\{.*?\});/;
* @param String videoId Identifier of the video to fetch.
* @return Object The player configuration object.
*/
export const getPlayerConfig = async (videoId) =>
const bareGetPlayerConfig = async (videoId) =>
{
const url = util.format(WATCH_BASE, encodeURIComponent(videoId));
log(`Fetching ${videoId} (${url})`);
const res = await fetch(url);
debugger;
if (res.status === 429)
{
throw new TemporaryError('Too many requests');
}
else if (res.status !== 200)
{
throw new Error(`Invalid YouTube HTTP status: ${res.status}`);
}
const body = await res.text();
// Look for the initial player response object to check whether the
@ -60,9 +69,12 @@ reason: "${response.playabilityStatus.reason}"`);
throw new Error(`Video ${videoId} has actual id ${actualId}`);
}
log(`Done fetching ${videoId}`);
return config;
};
export const getPlayerConfig = exclusive(retryable(bareGetPlayerConfig));
/**
* Get metadata about a YouTube video.
*

View File

@ -1,13 +1,10 @@
import util from 'util';
import * as api from './api.mjs';
import { WATCH_BASE, PromiseQueue, escapeQuotes } from './util.mjs';
import { retryable } from '../lib/retry.mjs';
import { WATCH_BASE, escapeQuotes } from './util.mjs';
const GRAPH_NODE = ' "%s" [label="%s", URL="%s", fontcolor=blue]';
const GRAPH_LINK = ' "%s" -> "%s"';
const retryPlayerConfig = retryable(api.getPlayerConfig);
/**
* Explore the video graph starting from the given root.
*
@ -19,23 +16,22 @@ export const exploreVideos = async videoId =>
{
// Store metadata about each visited video
const videosNodes = Object.create(null);
videosNodes[videoId] = {};
// List of videos linked from each video either through a card or an
// endscreen item
const nextVideos = Object.create(null);
nextVideos[videoId] = new Set();
// Pending video requests
const queue = new PromiseQueue();
queue.add(retryPlayerConfig(videoId));
// Videos that still need to be explored
const queue = [videoId];
while (!queue.empty())
while (queue.length > 0)
{
const config = await queue.next();
const currentId = queue.shift();
const config = await api.getPlayerConfig(currentId);
const meta = api.getVideoMeta(config);
videosNodes[meta.videoId] = meta;
videosNodes[currentId] = meta;
nextVideos[currentId] = new Set();
// Add links between this video and the linked ones
api.getEndScreenVideos(config)
@ -47,9 +43,7 @@ export const exploreVideos = async videoId =>
{
if (!(nextId in videosNodes))
{
videosNodes[nextId] = {};
nextVideos[nextId] = new Set();
queue.add(retryPlayerConfig(nextId));
queue.push(nextId);
}
}
}

View File

@ -4,8 +4,10 @@ import { sleep } from './util.mjs';
const log = debug('youtube-maze:retry');
/**
* An error that is expected to be temporary such that the initial action
* may be retried.
* An error that is expected to be temporary.
*
* If this error is raised, the initial action may be retried after a short
* period of time, and may eventually succeed.
*/
export class TemporaryError extends Error
{
@ -32,11 +34,11 @@ export const retryable = (func, retries = 3, cooldown = 1000) =>
{
return async (...args) =>
{
let remRetries = retries;
let curCooldown = cooldown;
while (true)
{
let remRetries = retries;
let curCooldown = cooldown;
try
{
const result = await func(...args);
@ -62,3 +64,33 @@ Retrying in ${curCooldown} ms (${remRetries} retries remaining)`);
}
};
};
/**
* Make an async function mutually exclusive.
*
* Only one execution of the async function may happen at the same time.
* In the meantime, other requests are added to a queue.
*
* @param function func Async function to call.
* @param number cooldown Time to wait before two executions.
*/
export const exclusive = (func, cooldown = 2000) =>
{
let pending = null;
return async (...args) =>
{
if (pending === null)
{
pending = func(...args);
}
else
{
pending = pending
.then(() => sleep(cooldown))
.then(() => func(...args));
}
return pending;
};
};

View File

@ -3,52 +3,6 @@ import util from 'util';
export const YOUTUBE_BASE = 'https://www.youtube.com/%s';
export const WATCH_BASE = util.format(YOUTUBE_BASE, 'watch?v=%s');
/**
* Hold a queue of promises from which results can be extracted.
*/
export class PromiseQueue
{
constructor()
{
this.pending = new Set();
}
/**
* Add a new promise to the queue.
*
* @param Promise promise Promise to be added.
*/
add(promise)
{
const wrapped = promise.then(res => {
this.pending.delete(wrapped);
return res;
});
this.pending.add(wrapped);
}
/**
* Check whether there is no promise pending in the queue.
*
* @return boolean
*/
empty()
{
return this.pending.size === 0;
}
/**
* Extract the next available result from the promise queue.
*
* @return any Next result.
*/
next()
{
return Promise.race(this.pending);
}
}
/**
* Escape double quotes in a string.
*/