Remove graph lib, fix race condition, parameterize video id

This commit is contained in:
Mattéo Delabre 2020-11-28 00:17:04 +01:00
parent e2514185e8
commit bb74afb9ec
Signed by: matteo
GPG Key ID: AE3FBD02DC583ABB
5 changed files with 63 additions and 89 deletions

View File

@ -18,7 +18,7 @@ export const getPlayerConfig = videoId =>
return new Promise((resolve, reject) =>
{
request(url, (err, res, body) =>
request(url, (err, _, body) =>
{
if (err)
{

View File

@ -2,8 +2,8 @@
import util from 'util';
const GRAPH_NODE = ' "%s" [label="%s"]';
const GRAPH_NODE_URL = ' "%s" [label="%s", URL="%s", fontcolor=blue]';
const YOUTUBE_WATCH = 'https://youtu.be/%s';
const GRAPH_NODE = ' "%s" [label="%s", URL="%s", fontcolor=blue]';
const GRAPH_LINK = ' "%s" -> "%s"';
/**
@ -12,45 +12,33 @@ const GRAPH_LINK = ' "%s" -> "%s"';
const escapeQuotes = str => str.replace(/"/g, '\\"');
/**
* Convert a graph to the DOT format.
* Convert a graph of videos to the DOT format.
*
* @param graph Graph to convert.
* @param nodes Nodes of the graph.
* @param next For each node, list of next neighbors.
* @param [title=identity] Function giving the name of each node.
* @param [url=none] Function given the URL of each node, or an empty string
* if a node has no URL.
* @return DOT representation of the graph.
*/
export const graphToDOT = (graph, title = id => id, url = () => '') =>
export const graphToDOT = (nodes, next) =>
{
const ser = graph.serialize();
// Convert nodes
const nodes = ser.nodes.map(({id}) =>
const nodesStr = Object.entries(nodes).map(([id, {title}]) =>
{
const nodeTitle = title(id);
const nodeUrl = url(id);
if (url === '')
{
return util.format(GRAPH_NODE, id, escapeQuotes(nodeTitle));
}
else
{
return util.format(GRAPH_NODE_URL, id, escapeQuotes(nodeTitle), nodeUrl);
}
}
).join('\n');
const url = util.format(YOUTUBE_WATCH, id);
return util.format(GRAPH_NODE, id, escapeQuotes(title), url);
}).join('\n');
// Convert edges
const links = ser.links.map(({source, target}) =>
util.format(GRAPH_LINK, source, target)
const nextStr = Object.entries(next).map(([id, neighbors]) =>
Array.from(neighbors)
.map(neighbor => util.format(GRAPH_LINK, id, neighbor))
.join('\n')
).join('\n');
return (
'digraph epenser {\n'
+ nodes
+ '\n'
+ links
+ '\n}'
);
return `digraph youtube {
${nodesStr}
${nextStr}
}`;
};

View File

@ -1,28 +1,26 @@
import Graph from 'graph-data-structure';
import fs from 'fs';
import path from 'path';
import util from 'util';
import * as api from './api.mjs';
import {graphToDOT} from './graph.mjs';
const YOUTUBE_WATCH = 'https://youtu.be/%s';
// Fetch the output path from command line
if (process.argv.length !== 3)
if (process.argv.length !== 4)
{
console.error(`Usage: node explore [output]`);
console.error(`Usage: ${process.argv[1]} ROOT DEST
Explore videos linked from ROOT and write the resulting graph to DEST.`);
process.exit(1);
}
const dest = process.argv[2];
// Graph of visited videos. Each node is a video which is linked to all the
// videos to which there is a link, either through a card or an endscreen item
const videosGraph = Graph();
const root = process.argv[2];
const dest = process.argv[3];
// Store metadata about each visited video
const videosMeta = Object.create(null);
const videosNodes = Object.create(null);
// List of videos linked from each video either through a card or an
// endscreen item
const nextVideos = Object.create(null);
/**
* Recursively explore a video and the video linked from it to fill
@ -33,40 +31,33 @@ const videosMeta = Object.create(null);
const exploreVideo = async videoId =>
{
// Make sure we dont explore the same video twice
if (videoId in videosMeta)
if (videoId in videosNodes)
{
return Promise.resolve();
}
const playerConfig = await api.getPlayerConfig(videoId);
videosMeta[videoId] = api.getVideoMeta(playerConfig);
videosNodes[videoId] = {};
const linkedVideos = [
...api.getEndScreenVideos(playerConfig),
...api.getCardVideos(playerConfig),
];
const playerConfig = await api.getPlayerConfig(videoId);
videosNodes[videoId] = api.getVideoMeta(playerConfig);
nextVideos[videoId] = new Set();
// Add links between this video and the linked ones
linkedVideos.forEach(id => videosGraph.addEdge(videoId, id));
api.getEndScreenVideos(playerConfig)
.forEach(nextId => nextVideos[videoId].add(nextId));
api.getCardVideos(playerConfig)
.forEach(nextId => nextVideos[videoId].add(nextId));
// Recurse on linked videos
return Promise.all(linkedVideos.map(id => exploreVideo(id)));
return Promise.all(
Array.from(nextVideos[videoId])
.map(id => exploreVideo(id))
);
};
// Metadata of the source video
const rootVideoId = 'EZGra6O8ClQ';
console.log('Starting to explore!');
exploreVideo(rootVideoId).then(() =>
exploreVideo(root).then(() =>
{
fs.writeFileSync(
dest,
graphToDOT(
videosGraph,
id => videosMeta[id].title,
id => util.format(YOUTUBE_WATCH, id)
)
);
console.log('Finished. Result in ' + dest);
fs.writeFileSync(dest, graphToDOT(videosNodes, nextVideos));
console.log(`Finished. Result in ${dest}`);
}).catch(console.error);

37
package-lock.json generated
View File

@ -5,9 +5,9 @@
"requires": true,
"dependencies": {
"ajv": {
"version": "6.12.3",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.3.tgz",
"integrity": "sha512-4K0cK3L1hsqk9xIb2z9vs/XU+PGJZ9PNpJRDS9YLzmNdX6jmVPfamLvTJr0aDAusnHyCHO6MjzlkAsgtqp9teA==",
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
"requires": {
"fast-deep-equal": "^3.1.1",
"fast-json-stable-stringify": "^2.0.0",
@ -39,9 +39,9 @@
"integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg="
},
"aws4": {
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/aws4/-/aws4-1.10.0.tgz",
"integrity": "sha512-3YDiu347mtVtjpyV3u5kVqQLP242c06zwDOgpeRnybmXlYYsLbtTrUBUm8i8srONt+FWobl5aibnU1030PeeuA=="
"version": "1.11.0",
"resolved": "https://registry.npmjs.org/aws4/-/aws4-1.11.0.tgz",
"integrity": "sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA=="
},
"bcrypt-pbkdf": {
"version": "1.0.2",
@ -134,22 +134,17 @@
"assert-plus": "^1.0.0"
}
},
"graph-data-structure": {
"version": "1.12.1",
"resolved": "https://registry.npmjs.org/graph-data-structure/-/graph-data-structure-1.12.1.tgz",
"integrity": "sha512-0DHxFEUk2EHO19PQrcOckz91WZPk7Itl2mmNpGdpSIZUtBUHRVJPtuuZCJAXB69YRL4fKDCRv2cX3ly8aZZ0QQ=="
},
"har-schema": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
"integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI="
},
"har-validator": {
"version": "5.1.3",
"resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz",
"integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==",
"version": "5.1.5",
"resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.5.tgz",
"integrity": "sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==",
"requires": {
"ajv": "^6.5.5",
"ajv": "^6.12.3",
"har-schema": "^2.0.0"
}
},
@ -270,9 +265,9 @@
}
},
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="
},
"safer-buffer": {
"version": "2.1.2",
@ -318,9 +313,9 @@
"integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q="
},
"uri-js": {
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz",
"integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==",
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.0.tgz",
"integrity": "sha512-B0yRTzYdUCCn9n+F4+Gh4yIDtMQcaJsmYBDsTSG8g/OejKBodLQ2IHfN3bM7jUsRXndopT7OIXWdYqc1fjmV6g==",
"requires": {
"punycode": "^2.1.0"
}

View File

@ -5,10 +5,10 @@
"main": "src/index.js",
"license": "CC0",
"scripts": {
"build": "mkdir -p build && node explore/index.mjs build/maze.dot && dot -Tsvg build/maze.dot -o build/maze.svg"
"epenser": "mkdir -p build && node explore/index.mjs EZGra6O8ClQ build/epenser.dot && dot -Tsvg build/epenser.dot -o build/epenser.svg",
"defakator": "mkdir -p build && node explore/index.mjs XM1ssJ8yxdg build/defakator.dot && dot -Tsvg build/defakator.dot -o build/defakator.svg"
},
"dependencies": {
"graph-data-structure": "^1.12.1",
"request": "^2.88.2"
}
}