diff --git a/info.js b/info.js new file mode 100644 index 0000000..a2dd09a --- /dev/null +++ b/info.js @@ -0,0 +1,484 @@ +const querystring = require('querystring'); +const sax = require('sax'); +const miniget = require('miniget'); +const utils = require('./utils'); +// Forces Node JS version of setTimeout for Electron based applications +const { setTimeout } = require('timers'); +const formatUtils = require('./format-utils'); +const urlUtils = require('./url-utils'); +const extras = require('./info-extras'); +const sig = require('./sig'); +const Cache = require('./cache'); + + +const BASE_URL = 'https://www.youtube.com/watch?v='; + + +// Cached for storing basic/full info. +exports.cache = new Cache(); +exports.cookieCache = new Cache(1000 * 60 * 60 * 24); +exports.watchPageCache = new Cache(); + + +// Special error class used to determine if an error is unrecoverable, +// as in, ytdl-core should not try again to fetch the video metadata. +// In this case, the video is usually unavailable in some way. +class UnrecoverableError extends Error {} + + +// List of URLs that show up in `notice_url` for age restricted videos. +const AGE_RESTRICTED_URLS = [ + 'support.google.com/youtube/?p=age_restrictions', + 'youtube.com/t/community_guidelines', +]; + + +/** + * Gets info from a video without getting additional formats. + * + * @param {string} id + * @param {Object} options + * @returns {Promise} +*/ +exports.getBasicInfo = async(id, options) => { + const retryOptions = Object.assign({}, miniget.defaultOptions, options.requestOptions); + options.requestOptions = Object.assign({}, options.requestOptions, {}); + options.requestOptions.headers = Object.assign({}, + { + // eslint-disable-next-line max-len + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Safari/537.36', + }, options.requestOptions.headers); + const validate = info => { + let playErr = utils.playError(info.player_response, ['ERROR'], UnrecoverableError); + let privateErr = privateVideoError(info.player_response); + if (playErr || privateErr) { + throw playErr || privateErr; + } + return info && info.player_response && ( + info.player_response.streamingData || isRental(info.player_response) || isNotYetBroadcasted(info.player_response) + ); + }; + let info = await pipeline([id, options], validate, retryOptions, [ + getWatchHTMLPage, + getWatchJSONPage, + getVideoInfoPage, + ]); + + Object.assign(info, { + formats: parseFormats(info.player_response), + related_videos: extras.getRelatedVideos(info), + }); + + // Add additional properties to info. + const media = extras.getMedia(info); + const additional = { + author: extras.getAuthor(info), + media, + likes: extras.getLikes(info), + dislikes: extras.getDislikes(info), + age_restricted: !!(media && media.notice_url && AGE_RESTRICTED_URLS.some(url => media.notice_url.includes(url))), + + // Give the standard link to the video. + video_url: BASE_URL + id, + storyboards: extras.getStoryboards(info), + chapters: extras.getChapters(info), + }; + + info.videoDetails = extras.cleanVideoDetails(Object.assign({}, + info.player_response && info.player_response.microformat && + info.player_response.microformat.playerMicroformatRenderer, + info.player_response && info.player_response.videoDetails, additional), info); + + return info; +}; + +const privateVideoError = player_response => { + let playability = player_response && player_response.playabilityStatus; + if (playability && playability.status === 'LOGIN_REQUIRED' && playability.messages && + playability.messages.filter(m => /This is a private video/.test(m)).length) { + return new UnrecoverableError(playability.reason || (playability.messages && playability.messages[0])); + } else { + return null; + } +}; + + +const isRental = player_response => { + let playability = player_response.playabilityStatus; + return playability && playability.status === 'UNPLAYABLE' && + playability.errorScreen && playability.errorScreen.playerLegacyDesktopYpcOfferRenderer; +}; + + +const isNotYetBroadcasted = player_response => { + let playability = player_response.playabilityStatus; + return playability && playability.status === 'LIVE_STREAM_OFFLINE'; +}; + + +const getWatchHTMLURL = (id, options) => `${BASE_URL + id}&hl=${options.lang || 'en'}`; +const getWatchHTMLPageBody = (id, options) => { + const url = getWatchHTMLURL(id, options); + return exports.watchPageCache.getOrSet(url, () => utils.exposedMiniget(url, options).text()); +}; + + +const EMBED_URL = 'https://www.youtube.com/embed/'; +const getEmbedPageBody = (id, options) => { + const embedUrl = `${EMBED_URL + id}?hl=${options.lang || 'en'}`; + return utils.exposedMiniget(embedUrl, options).text(); +}; + + +const getHTML5player = body => { + let html5playerRes = + /|"jsUrl":"([^"]+)"/ + .exec(body); + return html5playerRes ? html5playerRes[1] || html5playerRes[2] : null; +}; + + +const getIdentityToken = (id, options, key, throwIfNotFound) => + exports.cookieCache.getOrSet(key, async() => { + let page = await getWatchHTMLPageBody(id, options); + let match = page.match(/(["'])ID_TOKEN\1[:,]\s?"([^"]+)"/); + if (!match && throwIfNotFound) { + throw new UnrecoverableError('Cookie header used in request, but unable to find YouTube identity token'); + } + return match && match[2]; + }); + + +/** + * Goes through each endpoint in the pipeline, retrying on failure if the error is recoverable. + * If unable to succeed with one endpoint, moves onto the next one. + * + * @param {Array.} args + * @param {Function} validate + * @param {Object} retryOptions + * @param {Array.} endpoints + * @returns {[Object, Object, Object]} + */ +const pipeline = async(args, validate, retryOptions, endpoints) => { + let info; + for (let func of endpoints) { + try { + const newInfo = await retryFunc(func, args.concat([info]), retryOptions); + if (newInfo.player_response) { + newInfo.player_response.videoDetails = assign( + info && info.player_response && info.player_response.videoDetails, + newInfo.player_response.videoDetails); + newInfo.player_response = assign(info && info.player_response, newInfo.player_response); + } + info = assign(info, newInfo); + if (validate(info, false)) { + break; + } + } catch (err) { + if (err instanceof UnrecoverableError || func === endpoints[endpoints.length - 1]) { + throw err; + } + // Unable to find video metadata... so try next endpoint. + } + } + return info; +}; + + +/** + * Like Object.assign(), but ignores `null` and `undefined` from `source`. + * + * @param {Object} target + * @param {Object} source + * @returns {Object} + */ +const assign = (target, source) => { + if (!target || !source) { return target || source; } + for (let [key, value] of Object.entries(source)) { + if (value !== null && value !== undefined) { + target[key] = value; + } + } + return target; +}; + + +/** + * Given a function, calls it with `args` until it's successful, + * or until it encounters an unrecoverable error. + * Currently, any error from miniget is considered unrecoverable. Errors such as + * too many redirects, invalid URL, status code 404, status code 502. + * + * @param {Function} func + * @param {Array.} args + * @param {Object} options + * @param {number} options.maxRetries + * @param {Object} options.backoff + * @param {number} options.backoff.inc + */ +const retryFunc = async(func, args, options) => { + let currentTry = 0, result; + while (currentTry <= options.maxRetries) { + try { + result = await func(...args); + break; + } catch (err) { + if (err instanceof UnrecoverableError || + (err instanceof miniget.MinigetError && err.statusCode < 500) || currentTry >= options.maxRetries) { + throw err; + } + let wait = Math.min(++currentTry * options.backoff.inc, options.backoff.max); + await new Promise(resolve => setTimeout(resolve, wait)); + } + } + return result; +}; + + +const jsonClosingChars = /^[)\]}'\s]+/; +const parseJSON = (source, varName, json) => { + if (!json || typeof json === 'object') { + return json; + } else { + try { + json = json.replace(jsonClosingChars, ''); + return JSON.parse(json); + } catch (err) { + throw Error(`Error parsing ${varName} in ${source}: ${err.message}`); + } + } +}; + + +const findJSON = (source, varName, body, left, right, prependJSON) => { + let jsonStr = utils.between(body, left, right); + if (!jsonStr) { + throw Error(`Could not find ${varName} in ${source}`); + } + return parseJSON(source, varName, utils.cutAfterJSON(`${prependJSON}${jsonStr}`)); +}; + + +const findPlayerResponse = (source, info) => { + const player_response = info && ( + (info.args && info.args.player_response) || + info.player_response || info.playerResponse || info.embedded_player_response); + return parseJSON(source, 'player_response', player_response); +}; + + +const getWatchJSONURL = (id, options) => `${getWatchHTMLURL(id, options)}&pbj=1`; +const getWatchJSONPage = async(id, options) => { + const reqOptions = Object.assign({ headers: {} }, options.requestOptions); + let cookie = reqOptions.headers.Cookie || reqOptions.headers.cookie; + reqOptions.headers = Object.assign({ + 'x-youtube-client-name': '1', + 'x-youtube-client-version': '2.20201203.06.00', + 'x-youtube-identity-token': exports.cookieCache.get(cookie || 'browser') || '', + }, reqOptions.headers); + + const setIdentityToken = async(key, throwIfNotFound) => { + if (reqOptions.headers['x-youtube-identity-token']) { return; } + reqOptions.headers['x-youtube-identity-token'] = await getIdentityToken(id, options, key, throwIfNotFound); + }; + + if (cookie) { + await setIdentityToken(cookie, true); + } + + const jsonUrl = getWatchJSONURL(id, options); + const body = await utils.exposedMiniget(jsonUrl, options, reqOptions).text(); + let parsedBody = parseJSON('watch.json', 'body', body); + if (parsedBody.reload === 'now') { + await setIdentityToken('browser', false); + } + if (parsedBody.reload === 'now' || !Array.isArray(parsedBody)) { + throw Error('Unable to retrieve video metadata in watch.json'); + } + let info = parsedBody.reduce((part, curr) => Object.assign(curr, part), {}); + info.player_response = findPlayerResponse('watch.json', info); + info.html5player = info.player && info.player.assets && info.player.assets.js; + + return info; +}; + + +const getWatchHTMLPage = async(id, options) => { + let body = await getWatchHTMLPageBody(id, options); + let info = { page: 'watch' }; + try { + info.player_response = findJSON('watch.html', 'player_response', + body, /\bytInitialPlayerResponse\s*=\s*\{/i, '\n', '{'); + } catch (err) { + let args = findJSON('watch.html', 'player_response', body, /\bytplayer\.config\s*=\s*{/, '', '{'); + info.player_response = findPlayerResponse('watch.html', args); + } + info.response = findJSON('watch.html', 'response', body, /\bytInitialData("\])?\s*=\s*\{/i, '\n', '{'); + info.html5player = getHTML5player(body); + return info; +}; + + +const INFO_HOST = 'www.youtube.com'; +const INFO_PATH = '/get_video_info'; +const VIDEO_EURL = 'https://youtube.googleapis.com/v/'; +const getVideoInfoPage = async(id, options) => { + const url = new URL(`https://${INFO_HOST}${INFO_PATH}`); + url.searchParams.set('video_id', id); + url.searchParams.set('c', 'TVHTML5'); + url.searchParams.set('cver', '7.20190319'); + url.searchParams.set('eurl', VIDEO_EURL + id); + url.searchParams.set('ps', 'default'); + url.searchParams.set('gl', 'US'); + url.searchParams.set('hl', options.lang || 'en'); + url.searchParams.set('html5', '1'); + const body = await utils.exposedMiniget(url.toString(), options).text(); + let info = querystring.parse(body); + info.player_response = findPlayerResponse('get_video_info', info); + return info; +}; + + +/** + * @param {Object} player_response + * @returns {Array.} + */ +const parseFormats = player_response => { + let formats = []; + if (player_response && player_response.streamingData) { + formats = formats + .concat(player_response.streamingData.formats || []) + .concat(player_response.streamingData.adaptiveFormats || []); + } + return formats; +}; + + +/** + * Gets info from a video additional formats and deciphered URLs. + * + * @param {string} id + * @param {Object} options + * @returns {Promise} + */ +exports.getInfo = async(id, options) => { + let info = await exports.getBasicInfo(id, options); + const hasManifest = + info.player_response && info.player_response.streamingData && ( + info.player_response.streamingData.dashManifestUrl || + info.player_response.streamingData.hlsManifestUrl + ); + let funcs = []; + if (info.formats.length) { + info.html5player = info.html5player || + getHTML5player(await getWatchHTMLPageBody(id, options)) || getHTML5player(await getEmbedPageBody(id, options)); + if (!info.html5player) { + throw Error('Unable to find html5player file'); + } + const html5player = new URL(info.html5player, BASE_URL).toString(); + funcs.push(sig.decipherFormats(info.formats, html5player, options)); + } + if (hasManifest && info.player_response.streamingData.dashManifestUrl) { + let url = info.player_response.streamingData.dashManifestUrl; + funcs.push(getDashManifest(url, options)); + } + if (hasManifest && info.player_response.streamingData.hlsManifestUrl) { + let url = info.player_response.streamingData.hlsManifestUrl; + funcs.push(getM3U8(url, options)); + } + + let results = await Promise.all(funcs); + info.formats = Object.values(Object.assign({}, ...results)); + info.formats = info.formats.map(formatUtils.addFormatMeta); + info.formats.sort(formatUtils.sortFormats); + info.full = true; + return info; +}; + + +/** + * Gets additional DASH formats. + * + * @param {string} url + * @param {Object} options + * @returns {Promise>} + */ +const getDashManifest = (url, options) => new Promise((resolve, reject) => { + let formats = {}; + const parser = sax.parser(false); + parser.onerror = reject; + let adaptationSet; + parser.onopentag = node => { + if (node.name === 'ADAPTATIONSET') { + adaptationSet = node.attributes; + } else if (node.name === 'REPRESENTATION') { + const itag = parseInt(node.attributes.ID); + if (!isNaN(itag)) { + formats[url] = Object.assign({ + itag, url, + bitrate: parseInt(node.attributes.BANDWIDTH), + mimeType: `${adaptationSet.MIMETYPE}; codecs="${node.attributes.CODECS}"`, + }, node.attributes.HEIGHT ? { + width: parseInt(node.attributes.WIDTH), + height: parseInt(node.attributes.HEIGHT), + fps: parseInt(node.attributes.FRAMERATE), + } : { + audioSampleRate: node.attributes.AUDIOSAMPLINGRATE, + }); + } + } + }; + parser.onend = () => { resolve(formats); }; + const req = utils.exposedMiniget(new URL(url, BASE_URL).toString(), options); + req.setEncoding('utf8'); + req.on('error', reject); + req.on('data', chunk => { parser.write(chunk); }); + req.on('end', parser.close.bind(parser)); +}); + + +/** + * Gets additional formats. + * + * @param {string} url + * @param {Object} options + * @returns {Promise>} + */ +const getM3U8 = async(url, options) => { + url = new URL(url, BASE_URL); + const body = await utils.exposedMiniget(url.toString(), options).text(); + let formats = {}; + body + .split('\n') + .filter(line => /^https?:\/\//.test(line)) + .forEach(line => { + const itag = parseInt(line.match(/\/itag\/(\d+)\//)[1]); + formats[line] = { itag, url: line }; + }); + return formats; +}; + + +// Cache get info functions. +// In case a user wants to get a video's info before downloading. +for (let funcName of ['getBasicInfo', 'getInfo']) { + /** + * @param {string} link + * @param {Object} options + * @returns {Promise} + */ + const func = exports[funcName]; + exports[funcName] = async(link, options = {}) => { + utils.checkForUpdates(); + let id = await urlUtils.getVideoID(link); + const key = [funcName, id, options.lang].join('-'); + return exports.cache.getOrSet(key, () => func(id, options)); + }; +} + + +// Export a few helpers. +exports.validateID = urlUtils.validateID; +exports.validateURL = urlUtils.validateURL; +exports.getURLVideoID = urlUtils.getURLVideoID; +exports.getVideoID = urlUtils.getVideoID;