From e7d87e3bbd6f22ac6e59a4a858c9b21ed59d852c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matt=C3=A9o=20Delabre?= Date: Wed, 15 Jul 2020 19:14:31 +0200 Subject: [PATCH] Update to new YouTube API --- explore/api.js | 145 +++++++++++++++++++++++----------------------- explore/graph.js | 16 ++--- explore/index.js | 65 ++++++++------------- package-lock.json | 91 ++++++++++++++--------------- package.json | 4 +- 5 files changed, 150 insertions(+), 171 deletions(-) diff --git a/explore/api.js b/explore/api.js index 588dbe7..be3e997 100644 --- a/explore/api.js +++ b/explore/api.js @@ -5,20 +5,24 @@ const request = require('request'); const cheerio = require('cheerio'); const YOUTUBE_BASE = 'https://www.youtube.com/%s'; +const WATCH_BASE = util.format(YOUTUBE_BASE, 'watch?v=%s'); + const VIDEO_ID_REGEX = /watch\?v=([^&]*)/i; -const END_SCREEN_BASE = util.format(YOUTUBE_BASE, 'get_endscreen?v=%s'); -const CARD_BASE = util.format(YOUTUBE_BASE, 'annotations_invideo?video_id=%s'); +// const END_SCREEN_BASE = util.format(YOUTUBE_BASE, 'get_endscreen?v=%s'); +// const CARD_BASE = util.format(YOUTUBE_BASE, 'annotations_invideo?video_id=%s'); + +const playerRegex = /ytplayer\.config = (\{.*?\});/; /** - * Récupère une liste des vidéos liées à la fin d’une autre vidéo. + * Fetch the `ytplayer.config` object for a YouTube video. * - * @param videoId Identifiant de la vidéo source. - * @return Liste des vidéos liées. Chaque vidéo est un objet contenant - * son identifiant (videoId) et son titre (title). + * @async + * @param videoId Identifier of the video to fetch. + * @return The player configuration object. */ -const getEndScreenVideos = videoId => +const getPlayerConfig = videoId => { - const url = util.format(END_SCREEN_BASE, videoId); + const url = util.format(WATCH_BASE, videoId); return new Promise((resolve, reject) => { @@ -30,87 +34,84 @@ const getEndScreenVideos = videoId => return; } - // Suppression des caractères initiaux inutiles si applicable - if (body.substr(0, 3) === ')]}') + // Look for the definition of ytplayer.config and unserialize it + // and the player_response subobject + try { - body = body.substr(3); + const playerConfig = JSON.parse(body.match(playerRegex)[1]); + playerConfig.args.player_response + = JSON.parse(playerConfig.args.player_response); + resolve(playerConfig); } - - // Interprétation du JSON - const data = JSON.parse(body); - - // Aucun écran de fin - if ( - typeof data !== 'object' || data === null || - data.elements === undefined - ) + catch (err) { - resolve([]); - return; + reject(err); } - - // Filtrage des données pour extraire le titre et l’ID des vidéos - resolve(data.elements.map(elt => - { - const data = elt.endscreenElementRenderer; - const videoIdResults = VIDEO_ID_REGEX.exec( - data.endpoint.urlEndpoint.url - ); - - return { - videoId: videoIdResults ? videoIdResults[1] : null, - title: data.title.simpleText, - }; - })); }); }); }; +exports.getPlayerConfig = getPlayerConfig; + +/** + * Get metadata about a YouTube video. + * + * @param playerConfig The `ytplayer.config` object corresponding to the source + * YouTube video, as obtained from `getPlayerConfig`. + * @return Object containing the video metadata. + */ +const getVideoMeta = playerConfig => ({ + videoId: playerConfig.args.player_response.videoDetails.videoId, + title: playerConfig.args.player_response.videoDetails.title, +}); + +exports.getVideoMeta = getVideoMeta; + +/** + * Find videos linked from the endscreen of a YouTube video. + * + * @param playerConfig The `ytplayer.config` object corresponding to the source + * YouTube video, as obtained from `getPlayerConfig`. + * @return List of identifiers of linked videos. + */ +const getEndScreenVideos = playerConfig => +{ + const response = playerConfig.args.player_response; + + if (!('endscreen' in response)) + { + return []; + } + + return response.endscreen.endscreenRenderer.elements + .map(elt => elt.endscreenElementRenderer) + .filter(rdr => 'watchEndpoint' in rdr.endpoint) + .map(rdr => rdr.endpoint.watchEndpoint.videoId); +}; + exports.getEndScreenVideos = getEndScreenVideos; /** - * Récupère une liste des vidéos liées en tant que carte d’une autre vidéo. + * Find videos linked from as cards from a YouTube video. * - * @param videoId Identifiant de la vidéo source. - * @return Liste des vidéos liées. Chaque vidéo est un objet contenant - * son identifiant (videoId) et son titre (title). + * @param playerConfig The `ytplayer.config` object corresponding to the source + * YouTube video, as obtained from `getPlayerConfig`. + * @return List of identifiers of linked videos. */ -const getCardVideos = videoId => +const getCardVideos = playerConfig => { - const url = util.format(CARD_BASE, videoId); + const response = playerConfig.args.player_response; - return new Promise((resolve, reject) => + if (!('cards' in response)) { - request(url, (err, res, body) => - { - if (err) - { - reject(err); - return; - } + return []; + } - // Interprétation du XML externe et recherche des annotations - // de type carte - const nav = cheerio.load(body); - const cards = nav('annotation[type="card"][style="video"]'); - const list = []; - - cards.each((i, el) => - { - // Interprétation du JSON de chaque carte, extraction - // du titre et de l’identifiant de la vidéo - const data = JSON.parse(nav(el).children('data').text()); - const videoIdResults = VIDEO_ID_REGEX.exec(data.url); - - list.push({ - title: data.title, - videoId: videoIdResults ? videoIdResults[1] : null - }); - }); - - resolve(list); - }); - }); + return response.cards.cardCollectionRenderer.cards + .map(card => card.cardRenderer.content) + .filter(content => 'videoInfoCardContentRenderer' in content) + .map(content => content.videoInfoCardContentRenderer) + .map(rdr => rdr.action.watchEndpoint.videoId); }; exports.getCardVideos = getCardVideos; diff --git a/explore/graph.js b/explore/graph.js index 5ef738b..7719941 100644 --- a/explore/graph.js +++ b/explore/graph.js @@ -7,19 +7,19 @@ const GRAPH_NODE_URL = ' "%s" [label="%s", URL="%s", fontcolor=blue]'; const GRAPH_LINK = ' "%s" -> "%s"'; /** - * Convertit un graphe en format DOT. + * Convert a graph to the DOT format. * - * @param graph Graphe à convertir. - * @param [title=identity] Fonction donnant le titre de chaque nœud du graphe. - * @param [url=none] Fonction donnant l’URL de chaque nœud du graphe, ou la - * chaîne vide si un nœud n’a pas d’URL. - * @return Représentation DOT du graphe. + * @param graph Graph to convert. + * @param [title=identity] Function giving the name of each node. + * @param [url=none] Function given the URL of each node, or an empty string + * if a node has no URL. + * @return DOT representation of the graph. */ const graphToDOT = (graph, title = id => id, url = () => '') => { const ser = graph.serialize(); - // Conversion des nœuds de vidéo + // Convert nodes const nodes = ser.nodes.map(({id}) => { const nodeTitle = title(id); @@ -36,7 +36,7 @@ const graphToDOT = (graph, title = id => id, url = () => '') => } ).join('\n'); - // Conversion des liens de vidéo + // Convert edges const links = ser.links.map(({source, target}) => util.format(GRAPH_LINK, source, target) ).join('\n'); diff --git a/explore/index.js b/explore/index.js index 0c9eb0f..bea05a0 100644 --- a/explore/index.js +++ b/explore/index.js @@ -5,74 +5,59 @@ const fs = require('fs'); const path = require('path'); const util = require('util'); -const {getEndScreenVideos, getCardVideos} = require('./api'); +const api = require('./api'); const {graphToDOT} = require('./graph'); const YOUTUBE_WATCH = 'https://youtu.be/%s'; -// Récupère le chemin vers le fichier de sortie depuis la ligne de commande +// Fetch the output path from command line if (process.argv.length !== 3) { - console.error(`Utilisation : node src [sortie]`); + console.error(`Usage: node explore [output]`); process.exit(1); } const dest = process.argv[2]; -// Graphe des vidéos. Chaque nœud est une vidéo, et est liée à toutes -// les vidéos vers lesquelles elle a un lien (par carte ou écran de fin) +// Graph of visited videos. Each node is a video which is linked to all the +// videos to which there is a link, either through a card or an endscreen item const videosGraph = Graph(); -// Stocke les métadonnées de chaque vidéo +// Store metadata about each visited video const videosMeta = Object.create(null); -// Se souvient des vidéos déja visitées -const visitedVideos = Object.create(null); - /** - * Explore récursivement les liens d’une vidéo pour remplir le graphe - * des vidéos. + * Recursively explore a video and the video linked from it to fill + * the video graph. * - * @param videoId Identifiant de la vidéo source. + * @param videoId Source video identifier. */ -const exploreVideo = videoId => +const exploreVideo = async videoId => { - // S’assure de ne pas visiter deux fois la même vidéo - if (visitedVideos[videoId] === true) + // Make sure we don’t explore the same video twice + if (videoId in videosMeta) { return Promise.resolve(); } - visitedVideos[videoId] = true; + const playerConfig = await api.getPlayerConfig(videoId); + videosMeta[videoId] = api.getVideoMeta(playerConfig); - return Promise.all([ - getEndScreenVideos(videoId), - getCardVideos(videoId) - ]).then(results => - { - const videos = [].concat(...results); - const ids = videos.map(video => video.videoId); + const linkedVideos = [ + ...api.getEndScreenVideos(playerConfig), + ...api.getCardVideos(playerConfig), + ]; - // Ajout des vidéos liées dans les métadonnées - videos.forEach(video => videosMeta[video.videoId] = video); + // Add links between this video and the linked ones + linkedVideos.forEach(id => videosGraph.addEdge(videoId, id)); - // Ajout des liens entre la vidéo et ses vidéos liées - ids.forEach(id => videosGraph.addEdge(videoId, id)); - - // Récursion sur les vidéos non-explorées - return Promise.all(ids.map(id => exploreVideo(id))); - }); + // Recurse on linked videos + return Promise.all(linkedVideos.map(id => exploreVideo(id))); }; -// Métadonnées de la vidéo source +// Metadata of the source video const rootVideoId = 'EZGra6O8ClQ'; -videosMeta[rootVideoId] = { - title: '1 avril 2017 : présentation des règles', - videoId: rootVideoId, - links: [] -}; - -console.log('Démarrage de l’exploration !'); +console.log('Starting to explore!'); exploreVideo(rootVideoId).then(() => { @@ -85,5 +70,5 @@ exploreVideo(rootVideoId).then(() => ) ); - console.log('Terminé. Résultat dans ' + dest); + console.log('Finished. Result in ' + dest); }).catch(console.error); diff --git a/package-lock.json b/package-lock.json index 5503707..41b3343 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,11 +10,11 @@ "integrity": "sha512-Uy0PN4R5vgBUXFoJrKryf5aTk3kJ8Rv3PdlHjl6UaX+Cqp1QE0yPQ68MPXGrZOfG7gZVNDIJZYyot0B9ubXUrQ==" }, "ajv": { - "version": "6.10.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.10.1.tgz", - "integrity": "sha512-w1YQaVGNC6t2UCPjEawK/vo/dG8OOrVtUmhBT1uJJYxbl5kU2Tj3v6LGqBcsysN1yhuCStJCCA3GqdvKY8sqXQ==", + "version": "6.12.3", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.3.tgz", + "integrity": "sha512-4K0cK3L1hsqk9xIb2z9vs/XU+PGJZ9PNpJRDS9YLzmNdX6jmVPfamLvTJr0aDAusnHyCHO6MjzlkAsgtqp9teA==", "requires": { - "fast-deep-equal": "^2.0.1", + "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", "json-schema-traverse": "^0.4.1", "uri-js": "^4.2.2" @@ -44,9 +44,9 @@ "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=" }, "aws4": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", - "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.10.0.tgz", + "integrity": "sha512-3YDiu347mtVtjpyV3u5kVqQLP242c06zwDOgpeRnybmXlYYsLbtTrUBUm8i8srONt+FWobl5aibnU1030PeeuA==" }, "bcrypt-pbkdf": { "version": "1.0.2", @@ -177,14 +177,14 @@ "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=" }, "fast-deep-equal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", - "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=" + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" }, "fast-json-stable-stringify": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz", - "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I=" + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==" }, "forever-agent": { "version": "0.6.1", @@ -210,9 +210,9 @@ } }, "graph-data-structure": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/graph-data-structure/-/graph-data-structure-1.8.0.tgz", - "integrity": "sha512-G2Jl7JLGsq0FGBvFMfKkQEwjnk+1//ssIirR5GTQxXBbKqYZkmjDFEVcJ8H87dVpw4D8lZDJj9v9ggnHWW8M+g==" + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/graph-data-structure/-/graph-data-structure-1.12.1.tgz", + "integrity": "sha512-0DHxFEUk2EHO19PQrcOckz91WZPk7Itl2mmNpGdpSIZUtBUHRVJPtuuZCJAXB69YRL4fKDCRv2cX3ly8aZZ0QQ==" }, "har-schema": { "version": "2.0.0", @@ -298,21 +298,21 @@ } }, "lodash": { - "version": "4.17.11", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz", - "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==" + "version": "4.17.19", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.19.tgz", + "integrity": "sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ==" }, "mime-db": { - "version": "1.40.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz", - "integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA==" + "version": "1.44.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.44.0.tgz", + "integrity": "sha512-/NOTfLrsPBVeH7YtFPgsVWveuL+4SjjYxaQ1xtM1KMFj7HdxlBlxeyNLzhyJVx7r4rZGJAZ/6lkKCitSc/Nmpg==" }, "mime-types": { - "version": "2.1.24", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz", - "integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==", + "version": "2.1.27", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.27.tgz", + "integrity": "sha512-JIhqnCasI9yD+SsmkquHBxTSEuZdQX5BuQnS2Vc7puQQQ+8yiP5AY5uWhpdv4YL4VM5c6iliiYWPgJ/nJQLp7w==", "requires": { - "mime-db": "1.40.0" + "mime-db": "1.44.0" } }, "nth-check": { @@ -342,9 +342,9 @@ "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=" }, "psl": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/psl/-/psl-1.2.0.tgz", - "integrity": "sha512-GEn74ZffufCmkDDLNcl3uuyF/aSD6exEyh1v/ZSdAomB82t6G9hzJVRx0jBmLDW+VfZqks3aScmMw9DszwUalA==" + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.8.0.tgz", + "integrity": "sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==" }, "punycode": { "version": "2.1.1", @@ -367,9 +367,9 @@ } }, "request": { - "version": "2.88.0", - "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", - "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", + "version": "2.88.2", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz", + "integrity": "sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==", "requires": { "aws-sign2": "~0.7.0", "aws4": "^1.8.0", @@ -378,7 +378,7 @@ "extend": "~3.0.2", "forever-agent": "~0.6.1", "form-data": "~2.3.2", - "har-validator": "~5.1.0", + "har-validator": "~5.1.3", "http-signature": "~1.2.0", "is-typedarray": "~1.0.0", "isstream": "~0.1.2", @@ -388,7 +388,7 @@ "performance-now": "^2.1.0", "qs": "~6.5.2", "safe-buffer": "^5.1.2", - "tough-cookie": "~2.4.3", + "tough-cookie": "~2.5.0", "tunnel-agent": "^0.6.0", "uuid": "^3.3.2" } @@ -428,19 +428,12 @@ } }, "tough-cookie": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", - "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz", + "integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==", "requires": { - "psl": "^1.1.24", - "punycode": "^1.4.1" - }, - "dependencies": { - "punycode": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", - "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" - } + "psl": "^1.1.28", + "punycode": "^2.1.1" } }, "tunnel-agent": { @@ -470,9 +463,9 @@ "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" }, "uuid": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz", - "integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA==" + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz", + "integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==" }, "verror": { "version": "1.10.0", diff --git a/package.json b/package.json index 6c630bd..b5f660a 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ }, "dependencies": { "cheerio": "1.0.0-rc.3", - "graph-data-structure": "^1.8.0", - "request": "^2.88.0" + "graph-data-structure": "^1.12.1", + "request": "^2.88.2" } }