Sfoglia il codice sorgente

fix webshot for various complex cases

Mike L 3 anni fa
parent
commit
575d2475da
2 ha cambiato i file con 39 aggiunte e 21 eliminazioni
  1. 18 9
      dist/webshot.js
  2. 21 12
      src/webshot.ts

+ 18 - 9
dist/webshot.js

@@ -84,7 +84,7 @@ class Webshot extends CallableInstance {
                             .then(() => page.reload({ timeout: getTimeout() })),
                     ]))
                         .then(() => page.addStyleTag({
-                        content: 'header,#layers,section>div>div>div~div{display:none!important}article{background-color:transparent!important}' +
+                        content: 'header,#layers{display:none!important}article{background-color:transparent!important}' +
                             '[data-testid="caret"],[role="group"],[data-testid="tweet"] [class*=" "]+:last-child>*+[class*=" "]~div{display:none}',
                     }))
                         .then(() => page.addStyleTag({
@@ -102,12 +102,15 @@ class Webshot extends CallableInstance {
                         }, 250);
                     }))
                         .then(() => page.waitForSelector('xpath=//section/*/*/div[.//article[not(.//time[not(ancestor::div[@aria-labelledby])])]]', { state: 'attached', timeout: getTimeout() }))
-                        .then(handle => handle.$$('xpath=..//a[contains(@href,"content_you_see")]/../../..//*[@role="button"]')
+                        .then(handle => handle.evaluate(div => div.classList.add('mainTweet'))
+                        .then(() => page.addStyleTag({ content: 'div.mainTweet~div{display:none;}' }))
+                        .then(() => handle))
+                        .then(handle => handle.$$('xpath=(.|preceding-sibling::*)//a[contains(@href,"content_you_see")]/../../..//*[@role="button"]')
                         .then(sensitiveToggles => {
                         const count = sensitiveToggles.length;
                         if (count)
                             logger.info(`found ${count} sensitive ${count === 1 ? 'tweet' : 'tweets'} on page, uncollapsing...`);
-                        return (0, utils_1.chainPromises)(sensitiveToggles.filter(toggle => toggle.isVisible()).map(toggle => () => toggle.click()));
+                        return (0, utils_1.chainPromises)(sensitiveToggles.map(toggle => () => toggle.click()));
                     })
                         .then(() => handle))
                         .then(handle => handle.$('[data-testid="tweet"]').then(owner => owner ? handle : null))
@@ -119,7 +122,7 @@ class Webshot extends CallableInstance {
                             const path = temp.path({ suffix: '.html' });
                             (0, fs_1.writeFileSync)(path, html);
                             logger.warn(`saved debug html to ${path}`);
-                        }).then(() => page.screenshot()).then(screenshot => {
+                        }).then(() => page.route('**/*', route => route.abort())).then(() => page.screenshot({ fullPage: true })).then(screenshot => {
                             sharpToFile(sharp(screenshot).jpeg({ quality: 90 })).then(fileUri => {
                                 logger.warn(`saved debug screenshot to ${fileUri.substring(7)}`);
                             });
@@ -131,11 +134,17 @@ class Webshot extends CallableInstance {
                         return (0, utils_1.chainPromises)(morePostProcessings.map(func => () => func(page, handle)))
                             .then(() => (0, util_1.promisify)(setTimeout)(getTimeout()))
                             .then(() => page.evaluate(() => document.activeElement.blur()))
-                            .then(() => handle.$eval('article', article => article.clientHeight).then(height => handle.evaluate((div, minHeight) => {
-                            div.parentElement.setAttribute('style', `min-height: ${minHeight}`);
-                            div.setAttribute('style', 'margin: 0 -1px; padding: 0 1px');
-                        }, height)))
-                            .then(() => handle.screenshot());
+                            .then(() => handle.evaluateHandle(div => {
+                            const minHeight = Number(div.style.transform.match(/translateY\((.+)px\)/)[1]) + div.offsetHeight;
+                            const parentDiv = div.parentElement;
+                            parentDiv.setAttribute('style', `min-height: ${minHeight}px; margin: 0 -1px; padding: 0 1px`);
+                            return parentDiv;
+                        }))
+                            .catch(err => {
+                            logger.error(`error while parsing content height, failing this webshot`);
+                            throw err;
+                        })
+                            .then(parentDivHandle => parentDivHandle.screenshot());
                     })
                         .then(screenshot => {
                         new pngjs_1.PNG({

+ 21 - 12
src/webshot.ts

@@ -109,7 +109,7 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
             ]))
             // hide header, "more options" button, like and retweet count
             .then(() => page.addStyleTag({
-              content: 'header,#layers,section>div>div>div~div{display:none!important}article{background-color:transparent!important}' +
+              content: 'header,#layers{display:none!important}article{background-color:transparent!important}' +
                 '[data-testid="caret"],[role="group"],[data-testid="tweet"] [class*=" "]+:last-child>*+[class*=" "]~div{display:none}',
             }))
             .then(() => page.addStyleTag({
@@ -132,12 +132,17 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
               'xpath=//section/*/*/div[.//article[not(.//time[not(ancestor::div[@aria-labelledby])])]]',
               {state: 'attached', timeout: getTimeout()}
             ) as Promise<puppeteer.ElementHandle<HTMLDivElement>>)
+            // hide comments
+            .then(handle => handle.evaluate(div => div.classList.add('mainTweet'))
+              .then(() => page.addStyleTag({content: 'div.mainTweet~div{display:none;}'}))
+              .then(() => handle)
+            )
             // toggle visibility of sensitive tweets
-            .then(handle => handle.$$('xpath=..//a[contains(@href,"content_you_see")]/../../..//*[@role="button"]')
+            .then(handle => handle.$$('xpath=(.|preceding-sibling::*)//a[contains(@href,"content_you_see")]/../../..//*[@role="button"]')
               .then(sensitiveToggles => {
                 const count = sensitiveToggles.length;
                 if (count) logger.info(`found ${count} sensitive ${count === 1 ? 'tweet' : 'tweets'} on page, uncollapsing...`);
-                return chainPromises(sensitiveToggles.filter(toggle => toggle.isVisible()).map(toggle => () => toggle.click()));
+                return chainPromises(sensitiveToggles.map(toggle => () => toggle.click()));
               })
               .then(() => handle)
             )
@@ -151,7 +156,8 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
                 const path = temp.path({suffix: '.html'});
                 writeFileSync(path, html);
                 logger.warn(`saved debug html to ${path}`);
-              }).then(() => page.screenshot()).then(screenshot => {
+              }).then(() => page.route('**/*', route => route.abort())
+              ).then(() => page.screenshot({fullPage: true})).then(screenshot => {
                 sharpToFile(sharp(screenshot).jpeg({ quality: 90 })).then(fileUri => {
                   logger.warn(`saved debug screenshot to ${fileUri.substring(7)}`);
                 });
@@ -164,14 +170,17 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
                 // hide highlight of retweet header
                 .then(() => page.evaluate(() => (document.activeElement as unknown as HTMLOrSVGElement).blur()))
                 // determine screenshot height
-                .then(() =>
-                  handle.$eval('article', article => article.clientHeight).then(height =>
-                    handle.evaluate((div, minHeight) => {
-                      div.parentElement.setAttribute('style', `min-height: ${minHeight}`);
-                      div.setAttribute('style', 'margin: 0 -1px; padding: 0 1px');
-                    }, height))
-                )
-                .then(() => handle.screenshot());
+                .then(() => handle.evaluateHandle(div => {
+                  const minHeight = Number(div.style.transform.match(/translateY\((.+)px\)/)[1]) + div.offsetHeight;
+                  const parentDiv = div.parentElement;
+                  parentDiv.setAttribute('style', `min-height: ${minHeight}px; margin: 0 -1px; padding: 0 1px`);
+                  return parentDiv as HTMLDivElement;
+                }))
+                .catch(err => {
+                  logger.error(`error while parsing content height, failing this webshot`);
+                  throw err;
+                })
+                .then(parentDivHandle => parentDivHandle.screenshot());
             })
             .then(screenshot => {
               new PNG({