Browse Source

more timeout fix, better video scraping logic

Mike L 3 năm trước cách đây
mục cha
commit
7aa853e08b
4 tập tin đã thay đổi với 80 bổ sung81 xóa
  1. 3 2
      dist/utils.js
  2. 32 30
      dist/webshot.js
  3. 3 2
      src/utils.ts
  4. 42 47
      src/webshot.ts

+ 3 - 2
dist/utils.js

@@ -60,13 +60,14 @@ const parseStreamlist = (str) => {
 };
 const parsePlaylist = (str) => {
     const segmentRegex = /#EXTINF:(.*),\n(.*)/g;
-    const segmentPaths = [(/#EXT-X-MAP:URI="(.*)"/.exec(str) || [])[1]];
+    const segmentPaths = (/#EXT-X-MAP:URI="(.*)"/.exec(str) || []).slice(1);
     let match;
     let duration = 0;
     while (match = segmentRegex.exec(str)) {
         duration += Number(match[1]);
         segmentPaths.push(match[2]);
     }
-    return { duration, segmentPaths };
+    const extension = (/.*\.(.*?)$/.exec(segmentPaths[0]) || [])[1];
+    return { duration, segmentPaths, extension };
 };
 exports.M3u8 = { parseStreamlist, parsePlaylist };

+ 32 - 30
dist/webshot.js

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-const fs_1 = require("fs");
+const fs = require("fs");
 const util_1 = require("util");
 const axios_1 = require("axios");
 const CallableInstance = require("callable-instance");
@@ -94,7 +94,7 @@ class Webshot extends CallableInstance {
                     const getTimeout = () => Math.max(500, webshotDelay - getTimerTime());
                     const gotoUrlAndWaitForTweet = () => page.goto(url, { waitUntil: 'load', timeout: Math.min(10000, getTimeout()) })
                         .then(() => Promise.race([
-                        page.waitForSelector('article', { state: 'attached', timeout: getTimeout() }),
+                        page.waitForSelector('article', { state: 'attached', timeout: Math.min(10000, getTimeout()) }),
                         page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', { timeout: getTimeout() }),
                         page.waitForSelector('div[role="button"]>div>span>:text-matches("^やりなおす|更新$")', { state: 'attached', timeout: getTimeout() }).then(() => page.reload({ timeout: getTimeout() })),
                     ]))
@@ -161,7 +161,7 @@ class Webshot extends CallableInstance {
                         logger.warn(`${err} (${getTimerTime()} ms)`);
                         return page.evaluate(() => document.documentElement.outerHTML).then(html => {
                             const path = temp.path({ suffix: '.html' });
-                            (0, fs_1.writeFileSync)(path, html);
+                            fs.writeFileSync(path, html);
                             logger.warn(`saved debug html to ${path}`);
                         }).then(() => page.route('**/*', route => route.abort().catch(err => {
                             logger.error(`error aborting all requests for debug screenshot, error: ${err}`);
@@ -233,7 +233,7 @@ class Webshot extends CallableInstance {
             var _a;
             return (ext => {
                 const mediaTempFilePath = temp.path({ suffix: `.${ext}` });
-                (0, fs_1.writeFileSync)(mediaTempFilePath, Buffer.from(data));
+                fs.writeFileSync(mediaTempFilePath, Buffer.from(data));
                 return `file://${mediaTempFilePath}`;
             })(((_a = (/\?format=([a-z]+)&/.exec(url))) !== null && _a !== void 0 ? _a : (/.*\/.*\.([^?]+)/.exec(url)))[1]);
         })).then(path => {
@@ -242,6 +242,7 @@ class Webshot extends CallableInstance {
                 case 'png':
                     return koishi_1.Message.Image(path);
                 case 'mp4':
+                case 'ts':
                     return koishi_1.Message.Video(path);
             }
             logger.warn('unable to find MIME type of fetched media, failing this fetch');
@@ -345,32 +346,33 @@ class Webshot extends CallableInstance {
                     if (type === 'VIDEO')
                         page.evaluate(id_str => {
                             var _a;
-                            return (_a = window['__scrapedVideoUrls']) === null || _a === void 0 ? void 0 : _a.filter(videoUrl => new RegExp(`.*/amplify_video/${id_str}.*\\.m3u8(?:\\?|$)`).exec(videoUrl));
-                        }, entityBase.id_str).then(videoUrls => {
-                            if (videoUrls && videoUrls.length) {
-                                Promise.all(videoUrls.map(streamlistUrl => axiosGet(streamlistUrl, 'text')
-                                    .then(streamlist => utils_1.M3u8.parseStreamlist(streamlist)[0])
-                                    .then(({ bandwidth, playlistPath, resolution }) => {
-                                    const [width, height] = /(.*)x(.*)/.exec(resolution).slice(1).map(Number);
-                                    const playlistUrl = new URL(playlistPath, streamlistUrl);
-                                    const mediaTempFilePath = temp.path({ suffix: `.mp4` });
-                                    return axiosGet(playlistUrl.href, 'text')
-                                        .then(playlist => utils_1.M3u8.parsePlaylist(playlist))
-                                        .then(({ duration, segmentPaths }) => (0, utils_1.chainPromises)(segmentPaths.map(path => () => axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => {
-                                        (0, fs_1.writeFileSync)(mediaTempFilePath, Buffer.from(data), { flag: 'a' });
-                                    }))).then(() => ({
-                                        duration_millis: duration * 1000,
-                                        aspect_ratio: [width, height],
-                                        variants: [{
-                                                bitrate: bandwidth,
-                                                content_type: 'video/mp4',
-                                                url: `file://${mediaTempFilePath}`,
-                                            }]
-                                    })));
-                                }))).then(videoInfos => videoInfos.reduce((vi1, vi2) => (Object.assign(Object.assign({}, vi1), { variants: vi1.variants.concat(vi2.variants) })))).then(videoInfo => extendEntity(Object.assign(Object.assign({}, entityBase), { type: 'video', video_info: videoInfo }))).catch(error => {
-                                    logger.warn('unable to fetch scraped video, ignoring...');
-                                });
-                            }
+                            return (_a = window['__scrapedVideoUrls']) === null || _a === void 0 ? void 0 : _a.find(videoUrl => new RegExp(`.*/amplify_video/${id_str}/pl/[^/]*\\.m3u8(?:\\?|$)`).exec(videoUrl));
+                        }, entityBase.id_str).then(streamlistUrl => axiosGet(streamlistUrl, 'text')
+                            .then(utils_1.M3u8.parseStreamlist)
+                            .then(playlists => playlists.sort((pl1, pl2) => pl2.bandwidth - pl1.bandwidth)[0])
+                            .then(({ bandwidth, playlistPath, resolution }) => {
+                            const [width, height] = /(.*)x(.*)/.exec(resolution).slice(1).map(Number);
+                            const playlistUrl = new URL(playlistPath, streamlistUrl);
+                            return axiosGet(playlistUrl.href, 'text')
+                                .then(playlist => utils_1.M3u8.parsePlaylist(playlist))
+                                .then(({ duration, segmentPaths, extension: ext }) => {
+                                const mediaTempFilePath = temp.path({ suffix: `.${ext}` });
+                                return (0, utils_1.chainPromises)(segmentPaths.map(path => () => axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => {
+                                    fs.writeFileSync(mediaTempFilePath, Buffer.from(data), { flag: 'a' });
+                                })))
+                                    .then(() => ({
+                                    duration_millis: duration * 1000,
+                                    aspect_ratio: [width, height],
+                                    variants: [{
+                                            bitrate: bandwidth,
+                                            content_type: { mp4: 'video/mp4', ts: 'video/mp2t' }[ext],
+                                            url: `file://${mediaTempFilePath}`,
+                                        }]
+                                }));
+                            });
+                        })).then(videoInfo => extendEntity(Object.assign(Object.assign({}, entityBase), { type: 'video', video_info: videoInfo }))).catch(error => {
+                            logger.error(`error while fetching scraped video, error: ${error}`);
+                            logger.warn('unable to fetch scraped video, ignoring...');
                         });
                 })))
                     .then(fileurl => {

+ 3 - 2
src/utils.ts

@@ -66,14 +66,15 @@ const parseStreamlist = (str: string) => {
 
 const parsePlaylist = (str: string) => {
   const segmentRegex = /#EXTINF:(.*),\n(.*)/g;
-  const segmentPaths: string[] = [(/#EXT-X-MAP:URI="(.*)"/.exec(str) || [])[1]];
+  const segmentPaths: string[] = (/#EXT-X-MAP:URI="(.*)"/.exec(str) || []).slice(1);
   let match: RegExpExecArray;
   let duration = 0;
   while (match = segmentRegex.exec(str)) {
     duration += Number(match[1]);
     segmentPaths.push(match[2]);
   }
-  return {duration, segmentPaths};
+  const extension = (/.*\.(.*?)$/.exec(segmentPaths[0]) || [])[1];
+  return {duration, segmentPaths, extension};
 };
 
 export const M3u8 = {parseStreamlist, parsePlaylist};

+ 42 - 47
src/webshot.ts

@@ -1,4 +1,4 @@
-import { writeFileSync } from 'fs';
+import * as fs from 'fs';
 import { Readable } from 'stream';
 import { promisify } from 'util';
 
@@ -119,7 +119,7 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
           const gotoUrlAndWaitForTweet = () =>
             page.goto(url, {waitUntil: 'load', timeout: Math.min(10000, getTimeout())})
               .then(() => Promise.race([
-                page.waitForSelector('article', {state: 'attached', timeout: getTimeout()}),
+                page.waitForSelector('article', {state: 'attached', timeout: Math.min(10000, getTimeout())}),
                 page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', {timeout: getTimeout()}),
                 page.waitForSelector(
                   'div[role="button"]>div>span>:text-matches("^やりなおす|更新$")'
@@ -198,7 +198,7 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
               logger.warn(`${err} (${getTimerTime()} ms)`);
               return page.evaluate(() => document.documentElement.outerHTML).then(html => {
                 const path = temp.path({suffix: '.html'});
-                writeFileSync(path, html);
+                fs.writeFileSync(path, html);
                 logger.warn(`saved debug html to ${path}`);
               }).then(() => page.route('**/*', route => route.abort().catch(err => {
                 logger.error(`error aborting all requests for debug screenshot, error: ${err}`);
@@ -272,7 +272,7 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
     (url.match(/^file:/) ? Promise.resolve(url) : axiosGet(url, 'arraybuffer').then(data =>
       (ext => {
         const mediaTempFilePath = temp.path({suffix: `.${ext}`});
-        writeFileSync(mediaTempFilePath, Buffer.from(data));
+        fs.writeFileSync(mediaTempFilePath, Buffer.from(data));
         return `file://${mediaTempFilePath}`;
       })(((/\?format=([a-z]+)&/.exec(url)) ?? (/.*\/.*\.([^?]+)/.exec(url)))[1])
     )).then(path => {
@@ -281,6 +281,7 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
         case 'png':
           return Message.Image(path);
         case 'mp4':
+        case 'ts':
           return Message.Video(path);
       }
       logger.warn('unable to find MIME type of fetched media, failing this fetch');
@@ -387,51 +388,45 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
               type: 'photo',
             });
             if (type === 'VIDEO') page.evaluate(
-              id_str => (window['__scrapedVideoUrls'] as string[])?.filter(videoUrl =>
-                new RegExp(`.*/amplify_video/${id_str}.*\\.m3u8(?:\\?|$)`).exec(videoUrl)
+              id_str => (window['__scrapedVideoUrls'] as string[])?.find(videoUrl =>
+                new RegExp(`.*/amplify_video/${id_str}/pl/[^/]*\\.m3u8(?:\\?|$)`).exec(videoUrl)
               ),
               entityBase.id_str
-            ).then(videoUrls => {
-              if (videoUrls && videoUrls.length) {
-                Promise.all(videoUrls.map(streamlistUrl =>
-                  axiosGet(streamlistUrl, 'text')
-                    .then(streamlist => M3u8.parseStreamlist(streamlist)[0])
-                    .then(({bandwidth, playlistPath, resolution}) => {
-                      const [width, height] = /(.*)x(.*)/.exec(resolution).slice(1).map(Number);
-                      const playlistUrl = new URL(playlistPath, streamlistUrl);
-                      const mediaTempFilePath = temp.path({suffix: `.mp4`});
-                      return axiosGet(playlistUrl.href, 'text')
-                        .then(playlist => M3u8.parsePlaylist(playlist))
-                        .then(({duration, segmentPaths}) =>
-                          chainPromises(segmentPaths.map(path => () =>
-                            axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => {
-                              writeFileSync(mediaTempFilePath, Buffer.from(data), {flag: 'a'});
-                            })
-                          )).then(() => ({
-                            duration_millis: duration * 1000,
-                            aspect_ratio: [width, height],
-                            variants: [{
-                              bitrate: bandwidth,
-                              content_type: 'video/mp4',
-                              url: `file://${mediaTempFilePath}`,
-                            }]
-                          }) as MediaEntity['video_info'])
-                        )
-                    })
-                )).then(videoInfos =>
-                  videoInfos.reduce((vi1, vi2) => ({
-                    ...vi1,
-                    variants: vi1.variants.concat(vi2.variants)
-                  }))
-                ).then(videoInfo => extendEntity({
-                  ...entityBase,
-                  type: 'video',
-                  video_info: videoInfo,
-                })).catch(error => {
-                  logger.warn('unable to fetch scraped video, ignoring...');
-                });
-              }
-            });
+            ).then(streamlistUrl =>
+              axiosGet(streamlistUrl, 'text')
+                .then(M3u8.parseStreamlist)
+                .then(playlists => playlists.sort((pl1, pl2) => pl2.bandwidth - pl1.bandwidth)[0])
+                .then(({bandwidth, playlistPath, resolution}) => {
+                  const [width, height] = /(.*)x(.*)/.exec(resolution).slice(1).map(Number);
+                  const playlistUrl = new URL(playlistPath, streamlistUrl);
+                  return axiosGet(playlistUrl.href, 'text')
+                    .then(playlist => M3u8.parsePlaylist(playlist))
+                    .then(({duration, segmentPaths, extension: ext}) => {
+                      const mediaTempFilePath = temp.path({suffix: `.${ext}`});
+                      return chainPromises(segmentPaths.map(path => () =>
+                        axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => {
+                          fs.writeFileSync(mediaTempFilePath, Buffer.from(data), {flag: 'a'});
+                        })
+                      ))
+                      .then(() => ({
+                        duration_millis: duration * 1000,
+                        aspect_ratio: [width, height],
+                        variants: [{
+                          bitrate: bandwidth,
+                          content_type: {mp4: 'video/mp4', ts: 'video/mp2t'}[ext],
+                          url: `file://${mediaTempFilePath}`,
+                        }]
+                      }) as MediaEntity['video_info'])
+                    });
+                })
+              ).then(videoInfo => extendEntity({
+                ...entityBase,
+                type: 'video',
+                video_info: videoInfo,
+              })).catch(error => {
+                logger.error(`error while fetching scraped video, error: ${error}`);
+                logger.warn('unable to fetch scraped video, ignoring...');
+              });
           })
         ))
           .then(fileurl => {