Browse Source

webshot: proper page loading with 404 detection, fix test

Mike L 4 years ago
parent
commit
9bd100d7f0
7 changed files with 48 additions and 21 deletions
  1. 1 1
      README.md
  2. 1 1
      dist/main.js
  3. 20 8
      dist/webshot.js
  4. 1 0
      dist/webshot_test.js
  5. 1 1
      src/main.ts
  6. 22 9
      src/webshot.ts
  7. 2 1
      src/webshot_test.js

+ 1 - 1
README.md

@@ -28,7 +28,7 @@
 | mode | 工作模式,0 为图文模式,1 为纯文本模式,2 为文<br />本附图模式 | 0 |
 | resume_on_start | 是否在启动时从退出时的进度继续(拉取本应用非活<br />动时期错过的推文) | false |
 | work_interval | 对单个订阅两次拉取更新的最少间隔时间(秒) | 60 |
-| webshot_delay | 抓取网页截图时等待网页加载的延迟时长(毫秒) | 5000 |
+| webshot_delay | 抓取网页截图时等待网页加载的延迟时长(毫秒) | 10000 |
 | lockfile | 本地保存订阅信息以便下次启动时恢复 | subscriber.lock |
 | loglevel | 日志调试等级 | info |
 

+ 1 - 1
dist/main.js

@@ -71,7 +71,7 @@ if (config.work_interval === undefined) {
     config.work_interval = 60;
 }
 if (config.webshot_delay === undefined) {
-    config.webshot_delay = 5000;
+    config.webshot_delay = 10000;
 }
 if (config.loglevel === undefined) {
     config.loglevel = 'info';

+ 20 - 8
dist/webshot.js

@@ -61,7 +61,7 @@ class Webshot extends CallableInstance {
                 logger.info(`shooting ${width}*${height} webshot for ${url}`);
                 this.browser.newPage()
                     .then(page => {
-                    page.setUserAgent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
+                    const article = page.setUserAgent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
                         .then(() => page.setViewport({
                         width: width / zoomFactor,
                         height: height / zoomFactor,
@@ -69,15 +69,18 @@ class Webshot extends CallableInstance {
                         deviceScaleFactor: zoomFactor,
                     }))
                         .then(() => page.setBypassCSP(true))
-                        .then(() => page.goto(url, { waitUntil: 'load', timeout: 150000 }))
+                        .then(() => page.goto(url, { waitUntil: 'networkidle0', timeout: webshotDelay }))
+                        .catch(() => {
+                        logger.warn(`navigation timed out at ${webshotDelay} seconds`);
+                    })
                         // hide header, "more options" button, like and retweet count
                         .then(() => page.addStyleTag({
                         content: 'header{display:none!important}path[d=\'M20.207 7.043a1 1 0 0 0-1.414 0L12 13.836 5.207 7.043a1 1 0 0 0-1.414 1.414l7.5 7.5a.996.996 0 0 0 1.414 0l7.5-7.5a1 1 0 0 0 0-1.414z\'],div[role=\'button\']{display: none;}',
                     }))
-                        .then(() => page.waitFor(webshotDelay))
-                        .then(() => page.addScriptTag({
+                        .then(() => page.$('article'));
+                    const captureLoadedPage = () => page.addScriptTag({
                         content: 'document.documentElement.scrollTop=0;',
-                    }))
+                    })
                         .then(() => page.screenshot())
                         .then(screenshot => {
                         new pngjs_1.PNG({
@@ -159,6 +162,15 @@ class Webshot extends CallableInstance {
                         }).parse(screenshot);
                     })
                         .then(() => page.close());
+                    article.then(elementHandle => {
+                        if (elementHandle === null) {
+                            logger.error(`error shooting webshot for ${url}, could not load web page of tweet`);
+                            resolve({ base64: '', boundary: 0 });
+                        }
+                        else {
+                            captureLoadedPage();
+                        }
+                    });
                 })
                     .catch(reject);
             });
@@ -248,9 +260,9 @@ class Webshot extends CallableInstance {
                 const url = `https://mobile.twitter.com/${twi.user.screen_name}/status/${twi.id_str}`;
                 promise = promise.then(() => this.renderWebshot(url, 1920, webshotDelay))
                     .then(base64url => {
-                    if (base64url) {
+                    if (base64url)
                         return uploader(mirai_1.Message.Image('', base64url, url), () => mirai_1.Message.Plain(author + text));
-                    }
+                    return mirai_1.Message.Plain(author + text);
                 })
                     .then(msg => {
                     if (msg)
@@ -267,7 +279,7 @@ class Webshot extends CallableInstance {
                         }
                         else {
                             url = media.video_info.variants
-                                .filter(variant => variant.bitrate)
+                                .filter(variant => variant.bitrate !== undefined)
                                 .sort((var1, var2) => var2.bitrate - var1.bitrate)
                                 .map(variant => variant.url)[0]; // largest video
                         }

File diff suppressed because it is too large
+ 1 - 0
dist/webshot_test.js


+ 1 - 1
src/main.ts

@@ -78,7 +78,7 @@ if (config.work_interval === undefined) {
   config.work_interval = 60;
 }
 if (config.webshot_delay === undefined) {
-  config.webshot_delay = 5000;
+  config.webshot_delay = 10000;
 }
 if (config.loglevel === undefined) {
   config.loglevel = 'info';

+ 22 - 9
src/webshot.ts

@@ -73,7 +73,7 @@ extends CallableInstance<
       logger.info(`shooting ${width}*${height} webshot for ${url}`);
       this.browser.newPage()
         .then(page => {
-          page.setUserAgent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
+          const article = page.setUserAgent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
             .then(() => page.setViewport({
               width: width / zoomFactor,
               height: height / zoomFactor,
@@ -81,15 +81,20 @@ extends CallableInstance<
               deviceScaleFactor: zoomFactor,
             }))
             .then(() => page.setBypassCSP(true))
-            .then(() => page.goto(url, {waitUntil: 'load', timeout: 150000}))
+            .then(() => page.goto(url, {waitUntil: 'networkidle0', timeout: webshotDelay}))
+            .catch(() => {
+              logger.warn(`navigation timed out at ${webshotDelay} seconds`);
+            })
             // hide header, "more options" button, like and retweet count
             .then(() => page.addStyleTag({
               content: 'header{display:none!important}path[d=\'M20.207 7.043a1 1 0 0 0-1.414 0L12 13.836 5.207 7.043a1 1 0 0 0-1.414 1.414l7.5 7.5a.996.996 0 0 0 1.414 0l7.5-7.5a1 1 0 0 0 0-1.414z\'],div[role=\'button\']{display: none;}',
             }))
-            .then(() => page.waitFor(webshotDelay))
-            .then(() => page.addScriptTag({
+            .then(() => page.$('article'));
+
+          const captureLoadedPage = () =>
+            page.addScriptTag({
               content: 'document.documentElement.scrollTop=0;',
-            }))
+            })
             .then(() => page.screenshot())
             .then(screenshot => {
               new PNG({
@@ -171,6 +176,15 @@ extends CallableInstance<
               }).parse(screenshot);
             })
             .then(() => page.close());
+
+          article.then(elementHandle => {
+            if (elementHandle === null) {
+              logger.error(`error shooting webshot for ${url}, could not load web page of tweet`);
+              resolve({base64: '', boundary: 0});
+            } else {
+              captureLoadedPage();
+            }
+          });
         })
         .catch(reject);
     });
@@ -267,9 +281,8 @@ extends CallableInstance<
         const url = `https://mobile.twitter.com/${twi.user.screen_name}/status/${twi.id_str}`;
         promise = promise.then(() => this.renderWebshot(url, 1920, webshotDelay))
           .then(base64url => {
-            if (base64url) {
-              return uploader(Message.Image('', base64url, url), () => Message.Plain(author + text));
-            }
+            if (base64url) return uploader(Message.Image('', base64url, url), () => Message.Plain(author + text));
+            return Message.Plain(author + text);
           })
           .then(msg => {
             if (msg) messageChain.push(msg);
@@ -284,7 +297,7 @@ extends CallableInstance<
               url = media.media_url_https + ':orig';
             } else {
               url = media.video_info.variants
-                .filter(variant => variant.bitrate)
+                .filter(variant => variant.bitrate !== undefined)
                 .sort((var1, var2) => var2.bitrate - var1.bitrate)
                 .map(variant => variant.url)[0]; // largest video
             }

File diff suppressed because it is too large
+ 2 - 1
src/webshot_test.js


Some files were not shown because too many files changed in this diff