Browse Source

Update to new YouTube API

master
Mattéo Delabre 4 years ago
parent
commit
e7d87e3bbd
Signed by: matteo GPG Key ID: AE3FBD02DC583ABB
  1. 145
      explore/api.js
  2. 16
      explore/graph.js
  3. 65
      explore/index.js
  4. 91
      package-lock.json
  5. 4
      package.json

145
explore/api.js

@ -5,20 +5,24 @@ const request = require('request');
const cheerio = require('cheerio');
const YOUTUBE_BASE = 'https://www.youtube.com/%s';
const WATCH_BASE = util.format(YOUTUBE_BASE, 'watch?v=%s');
const VIDEO_ID_REGEX = /watch\?v=([^&]*)/i;
const END_SCREEN_BASE = util.format(YOUTUBE_BASE, 'get_endscreen?v=%s');
const CARD_BASE = util.format(YOUTUBE_BASE, 'annotations_invideo?video_id=%s');
// const END_SCREEN_BASE = util.format(YOUTUBE_BASE, 'get_endscreen?v=%s');
// const CARD_BASE = util.format(YOUTUBE_BASE, 'annotations_invideo?video_id=%s');
const playerRegex = /ytplayer\.config = (\{.*?\});/;
/**
* Récupère une liste des vidéos liées à la fin dune autre vidéo.
* Fetch the `ytplayer.config` object for a YouTube video.
*
* @param videoId Identifiant de la vidéo source.
* @return Liste des vidéos liées. Chaque vidéo est un objet contenant
* son identifiant (videoId) et son titre (title).
* @async
* @param videoId Identifier of the video to fetch.
* @return The player configuration object.
*/
const getEndScreenVideos = videoId =>
const getPlayerConfig = videoId =>
{
const url = util.format(END_SCREEN_BASE, videoId);
const url = util.format(WATCH_BASE, videoId);
return new Promise((resolve, reject) =>
{
@ -30,87 +34,84 @@ const getEndScreenVideos = videoId =>
return;
}
// Suppression des caractères initiaux inutiles si applicable
if (body.substr(0, 3) === ')]}')
// Look for the definition of ytplayer.config and unserialize it
// and the player_response subobject
try
{
body = body.substr(3);
const playerConfig = JSON.parse(body.match(playerRegex)[1]);
playerConfig.args.player_response
= JSON.parse(playerConfig.args.player_response);
resolve(playerConfig);
}
// Interprétation du JSON
const data = JSON.parse(body);
// Aucun écran de fin
if (
typeof data !== 'object' || data === null ||
data.elements === undefined
)
catch (err)
{
resolve([]);
return;
reject(err);
}
// Filtrage des données pour extraire le titre et l’ID des vidéos
resolve(data.elements.map(elt =>
{
const data = elt.endscreenElementRenderer;
const videoIdResults = VIDEO_ID_REGEX.exec(
data.endpoint.urlEndpoint.url
);
return {
videoId: videoIdResults ? videoIdResults[1] : null,
title: data.title.simpleText,
};
}));
});
});
};
exports.getEndScreenVideos = getEndScreenVideos;
exports.getPlayerConfig = getPlayerConfig;
/**
* Récupère une liste des vidéos liées en tant que carte dune autre vidéo.
* Get metadata about a YouTube video.
*
* @param videoId Identifiant de la vidéo source.
* @return Liste des vidéos liées. Chaque vidéo est un objet contenant
* son identifiant (videoId) et son titre (title).
* @param playerConfig The `ytplayer.config` object corresponding to the source
* YouTube video, as obtained from `getPlayerConfig`.
* @return Object containing the video metadata.
*/
const getCardVideos = videoId =>
const getVideoMeta = playerConfig => ({
videoId: playerConfig.args.player_response.videoDetails.videoId,
title: playerConfig.args.player_response.videoDetails.title,
});
exports.getVideoMeta = getVideoMeta;
/**
* Find videos linked from the endscreen of a YouTube video.
*
* @param playerConfig The `ytplayer.config` object corresponding to the source
* YouTube video, as obtained from `getPlayerConfig`.
* @return List of identifiers of linked videos.
*/
const getEndScreenVideos = playerConfig =>
{
const url = util.format(CARD_BASE, videoId);
const response = playerConfig.args.player_response;
return new Promise((resolve, reject) =>
if (!('endscreen' in response))
{
request(url, (err, res, body) =>
{
if (err)
{
reject(err);
return;
}
return [];
}
// Interprétation du XML externe et recherche des annotations
// de type carte
const nav = cheerio.load(body);
const cards = nav('annotation[type="card"][style="video"]');
const list = [];
return response.endscreen.endscreenRenderer.elements
.map(elt => elt.endscreenElementRenderer)
.filter(rdr => 'watchEndpoint' in rdr.endpoint)
.map(rdr => rdr.endpoint.watchEndpoint.videoId);
};
cards.each((i, el) =>
{
// Interprétation du JSON de chaque carte, extraction
// du titre et de l’identifiant de la vidéo
const data = JSON.parse(nav(el).children('data').text());
const videoIdResults = VIDEO_ID_REGEX.exec(data.url);
list.push({
title: data.title,
videoId: videoIdResults ? videoIdResults[1] : null
});
});
resolve(list);
});
});
exports.getEndScreenVideos = getEndScreenVideos;
/**
* Find videos linked from as cards from a YouTube video.
*
* @param playerConfig The `ytplayer.config` object corresponding to the source
* YouTube video, as obtained from `getPlayerConfig`.
* @return List of identifiers of linked videos.
*/
const getCardVideos = playerConfig =>
{
const response = playerConfig.args.player_response;
if (!('cards' in response))
{
return [];
}
return response.cards.cardCollectionRenderer.cards
.map(card => card.cardRenderer.content)
.filter(content => 'videoInfoCardContentRenderer' in content)
.map(content => content.videoInfoCardContentRenderer)
.map(rdr => rdr.action.watchEndpoint.videoId);
};
exports.getCardVideos = getCardVideos;

16
explore/graph.js

@ -7,19 +7,19 @@ const GRAPH_NODE_URL = ' "%s" [label="%s", URL="%s", fontcolor=blue]';
const GRAPH_LINK = ' "%s" -> "%s"';
/**
* Convertit un graphe en format DOT.
* Convert a graph to the DOT format.
*
* @param graph Graphe à convertir.
* @param [title=identity] Fonction donnant le titre de chaque nœud du graphe.
* @param [url=none] Fonction donnant lURL de chaque nœud du graphe, ou la
* chaîne vide si un nœud na pas dURL.
* @return Représentation DOT du graphe.
* @param graph Graph to convert.
* @param [title=identity] Function giving the name of each node.
* @param [url=none] Function given the URL of each node, or an empty string
* if a node has no URL.
* @return DOT representation of the graph.
*/
const graphToDOT = (graph, title = id => id, url = () => '') =>
{
const ser = graph.serialize();
// Conversion des nœuds de vidéo
// Convert nodes
const nodes = ser.nodes.map(({id}) =>
{
const nodeTitle = title(id);
@ -36,7 +36,7 @@ const graphToDOT = (graph, title = id => id, url = () => '') =>
}
).join('\n');
// Conversion des liens de vidéo
// Convert edges
const links = ser.links.map(({source, target}) =>
util.format(GRAPH_LINK, source, target)
).join('\n');

65
explore/index.js

@ -5,74 +5,59 @@ const fs = require('fs');
const path = require('path');
const util = require('util');
const {getEndScreenVideos, getCardVideos} = require('./api');
const api = require('./api');
const {graphToDOT} = require('./graph');
const YOUTUBE_WATCH = 'https://youtu.be/%s';
// Récupère le chemin vers le fichier de sortie depuis la ligne de commande
// Fetch the output path from command line
if (process.argv.length !== 3)
{
console.error(`Utilisation : node src [sortie]`);
console.error(`Usage: node explore [output]`);
process.exit(1);
}
const dest = process.argv[2];
// Graphe des vidéos. Chaque nœud est une vidéo, et est liée à toutes
// les vidéos vers lesquelles elle a un lien (par carte ou écran de fin)
// Graph of visited videos. Each node is a video which is linked to all the
// videos to which there is a link, either through a card or an endscreen item
const videosGraph = Graph();
// Stocke les métadonnées de chaque vidéo
// Store metadata about each visited video
const videosMeta = Object.create(null);
// Se souvient des vidéos déja visitées
const visitedVideos = Object.create(null);
/**
* Explore récursivement les liens dune vidéo pour remplir le graphe
* des vidéos.
* Recursively explore a video and the video linked from it to fill
* the video graph.
*
* @param videoId Identifiant de la vidéo source.
* @param videoId Source video identifier.
*/
const exploreVideo = videoId =>
const exploreVideo = async videoId =>
{
// S’assure de ne pas visiter deux fois la même vidéo
if (visitedVideos[videoId] === true)
// Make sure we don’t explore the same video twice
if (videoId in videosMeta)
{
return Promise.resolve();
}
visitedVideos[videoId] = true;
return Promise.all([
getEndScreenVideos(videoId),
getCardVideos(videoId)
]).then(results =>
{
const videos = [].concat(...results);
const ids = videos.map(video => video.videoId);
const playerConfig = await api.getPlayerConfig(videoId);
videosMeta[videoId] = api.getVideoMeta(playerConfig);
// Ajout des vidéos liées dans les métadonnées
videos.forEach(video => videosMeta[video.videoId] = video);
const linkedVideos = [
...api.getEndScreenVideos(playerConfig),
...api.getCardVideos(playerConfig),
];
// Ajout des liens entre la vidéo et ses vidéos liées
ids.forEach(id => videosGraph.addEdge(videoId, id));
// Add links between this video and the linked ones
linkedVideos.forEach(id => videosGraph.addEdge(videoId, id));
// Récursion sur les vidéos non-explorées
return Promise.all(ids.map(id => exploreVideo(id)));
});
// Recurse on linked videos
return Promise.all(linkedVideos.map(id => exploreVideo(id)));
};
// Métadonnées de la vidéo source
// Metadata of the source video
const rootVideoId = 'EZGra6O8ClQ';
videosMeta[rootVideoId] = {
title: '1 avril 2017 : présentation des règles',
videoId: rootVideoId,
links: []
};
console.log('Démarrage de l’exploration !');
console.log('Starting to explore!');
exploreVideo(rootVideoId).then(() =>
{
@ -85,5 +70,5 @@ exploreVideo(rootVideoId).then(() =>
)
);
console.log('Terminé. Résultat dans ' + dest);
console.log('Finished. Result in ' + dest);
}).catch(console.error);

91
package-lock.json

@ -10,11 +10,11 @@
"integrity": "sha512-Uy0PN4R5vgBUXFoJrKryf5aTk3kJ8Rv3PdlHjl6UaX+Cqp1QE0yPQ68MPXGrZOfG7gZVNDIJZYyot0B9ubXUrQ=="
},
"ajv": {
"version": "6.10.1",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.10.1.tgz",
"integrity": "sha512-w1YQaVGNC6t2UCPjEawK/vo/dG8OOrVtUmhBT1uJJYxbl5kU2Tj3v6LGqBcsysN1yhuCStJCCA3GqdvKY8sqXQ==",
"version": "6.12.3",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.3.tgz",
"integrity": "sha512-4K0cK3L1hsqk9xIb2z9vs/XU+PGJZ9PNpJRDS9YLzmNdX6jmVPfamLvTJr0aDAusnHyCHO6MjzlkAsgtqp9teA==",
"requires": {
"fast-deep-equal": "^2.0.1",
"fast-deep-equal": "^3.1.1",
"fast-json-stable-stringify": "^2.0.0",
"json-schema-traverse": "^0.4.1",
"uri-js": "^4.2.2"
@ -44,9 +44,9 @@
"integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg="
},
"aws4": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz",
"integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ=="
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/aws4/-/aws4-1.10.0.tgz",
"integrity": "sha512-3YDiu347mtVtjpyV3u5kVqQLP242c06zwDOgpeRnybmXlYYsLbtTrUBUm8i8srONt+FWobl5aibnU1030PeeuA=="
},
"bcrypt-pbkdf": {
"version": "1.0.2",
@ -177,14 +177,14 @@
"integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU="
},
"fast-deep-equal": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
"integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk="
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
},
"fast-json-stable-stringify": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz",
"integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I="
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
"integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="
},
"forever-agent": {
"version": "0.6.1",
@ -210,9 +210,9 @@
}
},
"graph-data-structure": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/graph-data-structure/-/graph-data-structure-1.8.0.tgz",
"integrity": "sha512-G2Jl7JLGsq0FGBvFMfKkQEwjnk+1//ssIirR5GTQxXBbKqYZkmjDFEVcJ8H87dVpw4D8lZDJj9v9ggnHWW8M+g=="
"version": "1.12.1",
"resolved": "https://registry.npmjs.org/graph-data-structure/-/graph-data-structure-1.12.1.tgz",
"integrity": "sha512-0DHxFEUk2EHO19PQrcOckz91WZPk7Itl2mmNpGdpSIZUtBUHRVJPtuuZCJAXB69YRL4fKDCRv2cX3ly8aZZ0QQ=="
},
"har-schema": {
"version": "2.0.0",
@ -298,21 +298,21 @@
}
},
"lodash": {
"version": "4.17.11",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz",
"integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg=="
"version": "4.17.19",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.19.tgz",
"integrity": "sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ=="
},
"mime-db": {
"version": "1.40.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz",
"integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA=="
"version": "1.44.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.44.0.tgz",
"integrity": "sha512-/NOTfLrsPBVeH7YtFPgsVWveuL+4SjjYxaQ1xtM1KMFj7HdxlBlxeyNLzhyJVx7r4rZGJAZ/6lkKCitSc/Nmpg=="
},
"mime-types": {
"version": "2.1.24",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz",
"integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==",
"version": "2.1.27",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.27.tgz",
"integrity": "sha512-JIhqnCasI9yD+SsmkquHBxTSEuZdQX5BuQnS2Vc7puQQQ+8yiP5AY5uWhpdv4YL4VM5c6iliiYWPgJ/nJQLp7w==",
"requires": {
"mime-db": "1.40.0"
"mime-db": "1.44.0"
}
},
"nth-check": {
@ -342,9 +342,9 @@
"integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns="
},
"psl": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.2.0.tgz",
"integrity": "sha512-GEn74ZffufCmkDDLNcl3uuyF/aSD6exEyh1v/ZSdAomB82t6G9hzJVRx0jBmLDW+VfZqks3aScmMw9DszwUalA=="
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.8.0.tgz",
"integrity": "sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ=="
},
"punycode": {
"version": "2.1.1",
@ -367,9 +367,9 @@
}
},
"request": {
"version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
"integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==",
"version": "2.88.2",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
"integrity": "sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==",
"requires": {
"aws-sign2": "~0.7.0",
"aws4": "^1.8.0",
@ -378,7 +378,7 @@
"extend": "~3.0.2",
"forever-agent": "~0.6.1",
"form-data": "~2.3.2",
"har-validator": "~5.1.0",
"har-validator": "~5.1.3",
"http-signature": "~1.2.0",
"is-typedarray": "~1.0.0",
"isstream": "~0.1.2",
@ -388,7 +388,7 @@
"performance-now": "^2.1.0",
"qs": "~6.5.2",
"safe-buffer": "^5.1.2",
"tough-cookie": "~2.4.3",
"tough-cookie": "~2.5.0",
"tunnel-agent": "^0.6.0",
"uuid": "^3.3.2"
}
@ -428,19 +428,12 @@
}
},
"tough-cookie": {
"version": "2.4.3",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz",
"integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==",
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz",
"integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==",
"requires": {
"psl": "^1.1.24",
"punycode": "^1.4.1"
},
"dependencies": {
"punycode": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
"integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4="
}
"psl": "^1.1.28",
"punycode": "^2.1.1"
}
},
"tunnel-agent": {
@ -470,9 +463,9 @@
"integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8="
},
"uuid": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz",
"integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA=="
"version": "3.4.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz",
"integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A=="
},
"verror": {
"version": "1.10.0",

4
package.json

@ -9,7 +9,7 @@
},
"dependencies": {
"cheerio": "1.0.0-rc.3",
"graph-data-structure": "^1.8.0",
"request": "^2.88.0"
"graph-data-structure": "^1.12.1",
"request": "^2.88.2"
}
}

Loading…
Cancel
Save