Bläddra i källkod

Merge branch 'koishi-redis-waiting' into mediaonly-koishi-redis-waiting

Mike L 3 år sedan
förälder
incheckning
1f89253d8f
4 ändrade filer med 332 tillägg och 136 borttagningar
  1. 22 1
      dist/utils.js
  2. 130 64
      dist/webshot.js
  3. 24 0
      src/utils.ts
  4. 156 71
      src/webshot.ts

+ 22 - 1
dist/utils.js

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.BigNumOps = exports.chainPromises = void 0;
+exports.M3u8 = exports.BigNumOps = exports.chainPromises = void 0;
 const chainPromises = (lazyPromises, reducer = (lp1, lp2) => (p) => lp1(p).then(lp2), initialValue) => lazyPromises.reduce(reducer, p => Promise.resolve(p))(initialValue);
 exports.chainPromises = chainPromises;
 const splitBigNumAt = (num, at) => num.replace(RegExp(String.raw `^([+-]?)(\d+)(\d{${at}})$`), '$1$2,$1$3')
@@ -49,3 +49,24 @@ exports.BigNumOps = {
     lShift: bigNumLShift,
     parse: parseBigNum,
 };
+const parseStreamlist = (str) => {
+    const variants = [];
+    const regex = /#EXT-X-STREAM-INF:.*BANDWIDTH=(.*),RESOLUTION=(.*),.*\n(.*)/g;
+    let match;
+    while (match = regex.exec(str)) {
+        variants.push({ bandwidth: Number(match[1]), resolution: match[2], playlistPath: match[3] });
+    }
+    return variants;
+};
+const parsePlaylist = (str) => {
+    const segmentRegex = /#EXTINF:(.*),\n(.*)/g;
+    const segmentPaths = [(/#EXT-X-MAP:URI="(.*)"/.exec(str) || [])[1]];
+    let match;
+    let duration = 0;
+    while (match = segmentRegex.exec(str)) {
+        duration += Number(match[1]);
+        segmentPaths.push(match[2]);
+    }
+    return { duration, segmentPaths };
+};
+exports.M3u8 = { parseStreamlist, parsePlaylist };

+ 130 - 64
dist/webshot.js

@@ -26,6 +26,27 @@ const typeInZH = {
     animated_gif: ZHType('GIF'),
 };
 const logger = (0, loggers_1.getLogger)('webshot');
+const axiosGet = (url, responseType, timeout = 150000) => {
+    logger.info(`fetching ${url}`);
+    return (0, axios_1.default)({
+        method: 'get',
+        url,
+        responseType,
+        timeout,
+    }).then(res => {
+        if (res.status === 200) {
+            logger.info(`successfully fetched ${url}`);
+            return res.data;
+        }
+        else {
+            logger.error(`failed to fetch ${url}: ${res.status}`);
+            throw new Error();
+        }
+    }).catch(err => {
+        logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`);
+        throw new Error();
+    });
+};
 class Webshot extends CallableInstance {
     constructor(wsUrl, mode, onready) {
         super('webshot');
@@ -71,10 +92,16 @@ class Webshot extends CallableInstance {
                     const startTime = new Date().getTime();
                     const getTimerTime = () => new Date().getTime() - startTime;
                     const getTimeout = () => Math.max(500, webshotDelay - getTimerTime());
-                    const goto = () => page.goto(url, { waitUntil: 'load', timeout: Math.min(10000, getTimeout()) }).catch(err => {
+                    const gotoUrlAndWaitForTweet = () => page.goto(url, { waitUntil: 'load', timeout: Math.min(10000, getTimeout()) })
+                        .then(() => Promise.race([
+                        page.waitForSelector('article', { state: 'attached', timeout: getTimeout() }),
+                        page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', { timeout: getTimeout() }),
+                        page.waitForSelector('div[role="button"]>div>span>:text-matches("^やりなおす|更新$")', { state: 'attached', timeout: getTimeout() }).then(() => page.reload({ timeout: getTimeout() })),
+                    ]))
+                        .catch(err => {
                         if (err.name === 'TimeoutError' && webshotDelay > getTimerTime()) {
                             logger.warn(`navigation timed out after ${getTimerTime()} ms, retrying...`);
-                            return goto();
+                            return gotoUrlAndWaitForTweet();
                         }
                         throw err;
                     });
@@ -82,14 +109,21 @@ class Webshot extends CallableInstance {
                         width: width / zoomFactor,
                         height: height / zoomFactor,
                     })
-                        .then(() => page.route('*:\/\/video.twimg.com\/**', route => route.abort()))
-                        .then(goto)
-                        .then(() => Promise.race([
-                        page.waitForSelector('article', { state: 'attached', timeout: getTimeout() }),
-                        page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', { timeout: getTimeout() }),
-                        page.waitForSelector('div[role="button"]>div>span>:text-matches("^やりなおす|更新$")', { state: 'attached', timeout: getTimeout() })
-                            .then(() => page.reload({ timeout: getTimeout() })),
-                    ]))
+                        .then(() => page.route('*://video.twimg.com/**', route => route.abort().then(() => page.evaluate(videoUrl => {
+                        let videoUrls = window['__scrapedVideoUrls'];
+                        if (!videoUrls)
+                            videoUrls = window['__scrapedVideoUrls'] = [];
+                        if (!videoUrls.includes(videoUrl)) {
+                            videoUrls.push(videoUrl);
+                            return videoUrl;
+                        }
+                    }, route.request().url())).then(videoUrl => {
+                        if (videoUrl)
+                            logger.info(`scraped ${route.request().url()} from page`);
+                    }).catch(err => {
+                        logger.error(`error aborting request to ${route.request().url()}, error: ${err}`);
+                    })))
+                        .then(gotoUrlAndWaitForTweet)
                         .then(() => page.addStyleTag({
                         content: 'header,#layers{display:none!important}article{background-color:transparent!important}' +
                             '[data-testid="caret"],[role="group"],[data-testid="tweet"] [class*=" "]+:last-child>*+[class*=" "]~div{display:none}',
@@ -129,7 +163,9 @@ class Webshot extends CallableInstance {
                             const path = temp.path({ suffix: '.html' });
                             (0, fs_1.writeFileSync)(path, html);
                             logger.warn(`saved debug html to ${path}`);
-                        }).then(() => page.route('**/*', route => route.abort())).then(() => page.screenshot({ fullPage: true })).then(screenshot => {
+                        }).then(() => page.route('**/*', route => route.abort().catch(err => {
+                            logger.error(`error aborting all requests for debug screenshot, error: ${err}`);
+                        }))).then(() => page.screenshot({ fullPage: true })).then(screenshot => {
                             sharpToFile(sharp(screenshot).jpeg({ quality: 90 })).then(fileUri => {
                                 logger.warn(`saved debug screenshot to ${fileUri.substring(7)}`);
                             });
@@ -138,8 +174,11 @@ class Webshot extends CallableInstance {
                         .then(handle => {
                         if (handle === null)
                             throw new puppeteer.errors.TimeoutError();
+                        let cropTop;
                         return (0, utils_1.chainPromises)(morePostProcessings.map(func => () => func(page, handle)))
                             .then(() => (0, util_1.promisify)(setTimeout)(getTimeout()))
+                            .then(() => page.evaluate(() => document.documentElement.scrollTop))
+                            .then(scrollTop => { cropTop = scrollTop * zoomFactor; })
                             .then(() => page.evaluate(() => document.activeElement.blur()))
                             .then(() => handle.evaluateHandle(div => {
                             const minHeight = Number(div.style.transform.match(/translateY\((.+)px\)/)[1]) + div.offsetHeight;
@@ -151,16 +190,23 @@ class Webshot extends CallableInstance {
                             logger.error(`error while parsing content height, failing this webshot`);
                             throw err;
                         })
-                            .then(parentDivHandle => parentDivHandle.screenshot());
+                            .then(parentDivHandle => parentDivHandle.screenshot())
+                            .then(screenshot => [screenshot, cropTop]);
                     })
-                        .then(screenshot => {
+                        .then(([screenshot, cropTop]) => {
                         new pngjs_1.PNG({
                             filterType: 4,
                             deflateLevel: 0,
                         }).on('parsed', function () {
-                            sharpToFile(jpeg(this.pack())).then(path => {
+                            let png = this;
+                            if (cropTop > 0) {
+                                logger.info(`cropping screenshot at y offset ${cropTop}...`);
+                                png = new pngjs_1.PNG({ width: this.width, height: this.height - cropTop });
+                                this.bitblt(png, 0, cropTop, png.width, png.height, 0, 0);
+                            }
+                            sharpToFile(jpeg(png.pack())).then(path => {
                                 logger.info(`finished webshot for ${url}`);
-                                resolve({ path, boundary: this.height });
+                                resolve({ path, boundary: png.height });
                             });
                         }).parse(screenshot);
                     })
@@ -183,42 +229,23 @@ class Webshot extends CallableInstance {
             }).catch(error => this.reconnect(error)
                 .then(() => this.renderWebshot(url, height, webshotDelay, ...morePostProcessings)));
         };
-        this.fetchMedia = (url) => new Promise((resolve, reject) => {
-            logger.info(`fetching ${url}`);
-            (0, axios_1.default)({
-                method: 'get',
-                url,
-                responseType: 'arraybuffer',
-                timeout: 150000,
-            }).then(res => {
-                if (res.status === 200) {
-                    logger.info(`successfully fetched ${url}`);
-                    resolve(res.data);
-                }
-                else {
-                    logger.error(`failed to fetch ${url}: ${res.status}`);
-                    reject();
-                }
-            }).catch(err => {
-                logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`);
-                reject();
-            });
-        }).then(data => {
+        this.fetchMedia = (url) => (url.match(/^file:/) ? Promise.resolve(url) : axiosGet(url, 'arraybuffer').then(data => {
             var _a;
             return (ext => {
                 const mediaTempFilePath = temp.path({ suffix: `.${ext}` });
                 (0, fs_1.writeFileSync)(mediaTempFilePath, Buffer.from(data));
-                const path = `file://${mediaTempFilePath}`;
-                switch (ext) {
-                    case 'jpg':
-                    case 'png':
-                        return koishi_1.Message.Image(path);
-                    case 'mp4':
-                        return koishi_1.Message.Video(path);
-                }
-                logger.warn('unable to find MIME type of fetched media, failing this fetch');
-                throw Error();
+                return `file://${mediaTempFilePath}`;
             })(((_a = (/\?format=([a-z]+)&/.exec(url))) !== null && _a !== void 0 ? _a : (/.*\/.*\.([^?]+)/.exec(url)))[1]);
+        })).then(path => {
+            switch ((/.*\.(.*?)$/.exec(path) || [])[1]) {
+                case 'jpg':
+                case 'png':
+                    return koishi_1.Message.Image(path);
+                case 'mp4':
+                    return koishi_1.Message.Video(path);
+            }
+            logger.warn('unable to find MIME type of fetched media, failing this fetch');
+            throw Error();
         });
         if (this.mode = mode) {
             onready();
@@ -257,12 +284,12 @@ class Webshot extends CallableInstance {
             });
             if (this.mode === 0) {
                 const url = `https://mobile.twitter.com/${twi.user.screen_name}/status/${twi.id_str}`;
-                const extendEntity = (cardImg) => {
-                    var _a, _b;
-                    originTwi.extended_entities = Object.assign(Object.assign({}, originTwi.extended_entities), { media: [
-                            ...(_b = (_a = originTwi.extended_entities) === null || _a === void 0 ? void 0 : _a.media) !== null && _b !== void 0 ? _b : [],
-                            cardImg,
-                        ] });
+                const extendEntity = (cardMedia) => {
+                    var _a;
+                    return (media => {
+                        if (!media.some(entity => entity.id_str === cardMedia.id_str))
+                            media.push(cardMedia);
+                    })((_a = (originTwi.extended_entities || (originTwi.extended_entities = {}))).media || (_a.media = []));
                 };
                 const truncateLongThread = (atId) => {
                     if (!atId)
@@ -286,27 +313,66 @@ class Webshot extends CallableInstance {
                     }
                     catch (_a) { }
                     document.documentElement.scrollTop = 0;
-                }).then(truncateLongThread), (_, tweetHandle) => tweetHandle.evaluate(div => {
-                    const cardImg = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"] img');
-                    if (typeof (cardImg === null || cardImg === void 0 ? void 0 : cardImg.getAttribute('src')) === 'string') {
-                        const match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardImg === null || cardImg === void 0 ? void 0 : cardImg.getAttribute('src'));
-                        if (match) {
-                            const [media_url_https, id_str] = match.slice(1);
-                            return {
+                }).then(truncateLongThread), (page, tweetHandle) => tweetHandle.evaluate(div => {
+                    const cardMedia = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"] img, video');
+                    let match;
+                    if ((cardMedia === null || cardMedia === void 0 ? void 0 : cardMedia.tagName) === 'IMG' && typeof (cardMedia === null || cardMedia === void 0 ? void 0 : cardMedia.getAttribute('src')) === 'string') {
+                        match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardMedia === null || cardMedia === void 0 ? void 0 : cardMedia.getAttribute('src'));
+                    }
+                    if ((cardMedia === null || cardMedia === void 0 ? void 0 : cardMedia.tagName) === 'VIDEO' && typeof (cardMedia === null || cardMedia === void 0 ? void 0 : cardMedia.getAttribute('poster')) === 'string') {
+                        match = /^(.*\/amplify_video_thumb\/(\d+)\/img\/.*$)/.exec(cardMedia === null || cardMedia === void 0 ? void 0 : cardMedia.getAttribute('poster'));
+                    }
+                    if (match) {
+                        const [media_url_https, id_str] = match.slice(1);
+                        return {
+                            type: cardMedia.tagName,
+                            entityBase: {
                                 media_url: media_url_https.replace(/^https/, 'http'),
                                 media_url_https,
                                 url: '',
                                 display_url: '',
                                 expanded_url: '',
-                                type: 'photo',
                                 id: Number(id_str),
                                 id_str,
                                 sizes: undefined,
-                            };
-                        }
+                            }
+                        };
                     }
-                }).then(cardImg => { if (cardImg)
-                    extendEntity(cardImg); })))
+                    return {};
+                }).then(({ type, entityBase }) => {
+                    if (type === 'IMG')
+                        extendEntity(Object.assign(Object.assign({}, entityBase), { type: 'photo' }));
+                    if (type === 'VIDEO')
+                        page.evaluate(id_str => {
+                            var _a;
+                            return (_a = window['__scrapedVideoUrls']) === null || _a === void 0 ? void 0 : _a.filter(videoUrl => new RegExp(`.*/amplify_video/${id_str}.*\\.m3u8(?:\\?|$)`).exec(videoUrl));
+                        }, entityBase.id_str).then(videoUrls => {
+                            if (videoUrls && videoUrls.length) {
+                                Promise.all(videoUrls.map(streamlistUrl => axiosGet(streamlistUrl, 'text')
+                                    .then(streamlist => utils_1.M3u8.parseStreamlist(streamlist)[0])
+                                    .then(({ bandwidth, playlistPath, resolution }) => {
+                                    const [width, height] = /(.*)x(.*)/.exec(resolution).slice(1).map(Number);
+                                    const playlistUrl = new URL(playlistPath, streamlistUrl);
+                                    const mediaTempFilePath = temp.path({ suffix: `.mp4` });
+                                    return axiosGet(playlistUrl.href, 'text')
+                                        .then(playlist => utils_1.M3u8.parsePlaylist(playlist))
+                                        .then(({ duration, segmentPaths }) => (0, utils_1.chainPromises)(segmentPaths.map(path => () => axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => {
+                                        (0, fs_1.writeFileSync)(mediaTempFilePath, Buffer.from(data), { flag: 'a' });
+                                    }))).then(() => ({
+                                        duration_millis: duration * 1000,
+                                        aspect_ratio: [width, height],
+                                        variants: [{
+                                                bitrate: bandwidth,
+                                                content_type: 'video/mp4',
+                                                url: `file://${mediaTempFilePath}`,
+                                            }]
+                                    })));
+                                }))).then(videoInfos => videoInfos.reduce((vi1, vi2) => (Object.assign(Object.assign({}, vi1), { variants: vi1.variants.concat(vi2.variants) })))).then(videoInfo => extendEntity(Object.assign(Object.assign({}, entityBase), { type: 'video', video_info: videoInfo }))).catch(error => {
+                                    logger.warn('unable to fetch scraped video, ignoring...');
+                                });
+                            }
+                        });
+                })))
                     .then(fileurl => {
                     if (fileurl)
                         return koishi_1.Message.Image(fileurl);

+ 24 - 0
src/utils.ts

@@ -53,3 +53,27 @@ export const BigNumOps = {
   lShift: bigNumLShift,
   parse: parseBigNum,
 };
+
+const parseStreamlist = (str: string) => {
+  const variants: {bandwidth: number, resolution: string, playlistPath: string}[] = [];
+  const regex = /#EXT-X-STREAM-INF:.*BANDWIDTH=(.*),RESOLUTION=(.*),.*\n(.*)/g;
+  let match: RegExpExecArray;
+  while (match = regex.exec(str)) {
+    variants.push({bandwidth: Number(match[1]), resolution: match[2], playlistPath: match[3]})
+  }
+  return variants;
+};
+
+const parsePlaylist = (str: string) => {
+  const segmentRegex = /#EXTINF:(.*),\n(.*)/g;
+  const segmentPaths: string[] = [(/#EXT-X-MAP:URI="(.*)"/.exec(str) || [])[1]];
+  let match: RegExpExecArray;
+  let duration = 0;
+  while (match = segmentRegex.exec(str)) {
+    duration += Number(match[1]);
+    segmentPaths.push(match[2]);
+  }
+  return {duration, segmentPaths};
+};
+
+export const M3u8 = {parseStreamlist, parsePlaylist};

+ 156 - 71
src/webshot.ts

@@ -2,7 +2,7 @@ import { writeFileSync } from 'fs';
 import { Readable } from 'stream';
 import { promisify } from 'util';
 
-import axios from 'axios';
+import axios, { ResponseType as AxiosResponseType } from 'axios';
 import * as CallableInstance from 'callable-instance';
 import { XmlEntities } from 'html-entities';
 import { PNG } from 'pngjs';
@@ -13,7 +13,7 @@ import * as temp from 'temp';
 import { getLogger } from './loggers';
 import { Message } from './koishi';
 import { MediaEntity, Tweet } from './twitter';
-import { chainPromises } from './utils';
+import { chainPromises, M3u8 } from './utils';
 
 const xmlEntities = new XmlEntities();
 
@@ -30,6 +30,27 @@ const typeInZH = {
 
 const logger = getLogger('webshot');
 
+const axiosGet = <T extends AxiosResponseType>(url: string, responseType: T, timeout = 150000) => {
+  logger.info(`fetching ${url}`);
+  return axios({
+    method: 'get',
+    url,
+    responseType,
+    timeout,
+  }).then(res => {
+    if (res.status === 200) {
+      logger.info(`successfully fetched ${url}`);
+      return res.data as {text: string, arraybuffer: ArrayBuffer, [k: string]: any}[T];
+    } else {
+      logger.error(`failed to fetch ${url}: ${res.status}`);
+      throw new Error();
+    }
+  }).catch (err => {
+    logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`);
+    throw new Error();
+  });
+};
+
 class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Promise<void>> {
 
   private browser: puppeteer.Browser;
@@ -95,25 +116,41 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
           const startTime = new Date().getTime();
           const getTimerTime = () => new Date().getTime() - startTime;
           const getTimeout = () => Math.max(500, webshotDelay - getTimerTime());
-          const goto = () => page.goto(url, {waitUntil: 'load', timeout: Math.min(10000, getTimeout())}).catch(err => {
-            if (err.name === 'TimeoutError' && webshotDelay > getTimerTime()) {
-              logger.warn(`navigation timed out after ${getTimerTime()} ms, retrying...`);
-              return goto();
-            }
-            throw err;
-          });
+          const gotoUrlAndWaitForTweet = () =>
+            page.goto(url, {waitUntil: 'load', timeout: Math.min(10000, getTimeout())})
+              .then(() => Promise.race([
+                page.waitForSelector('article', {state: 'attached', timeout: getTimeout()}),
+                page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', {timeout: getTimeout()}),
+                page.waitForSelector(
+                  'div[role="button"]>div>span>:text-matches("^やりなおす|更新$")'
+                , {state: 'attached', timeout: getTimeout()}).then(() => page.reload({timeout: getTimeout()})),
+              ]))
+              .catch(err => {
+                if (err.name === 'TimeoutError' && webshotDelay > getTimerTime()) {
+                  logger.warn(`navigation timed out after ${getTimerTime()} ms, retrying...`);
+                  return gotoUrlAndWaitForTweet();
+                }
+                throw err;
+              });
           page.setViewportSize({
             width: width / zoomFactor,
             height: height / zoomFactor,
           })
-            .then(() => page.route('*:\/\/video.twimg.com\/**', route => route.abort()))
-            .then(goto)
-            .then(() => Promise.race([
-              page.waitForSelector('article', {state: 'attached', timeout: getTimeout()}),
-              page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', {timeout: getTimeout()}),
-              page.waitForSelector('div[role="button"]>div>span>:text-matches("^やりなおす|更新$")', {state: 'attached', timeout: getTimeout()})
-                .then(() => page.reload({timeout: getTimeout()})),
-            ]))
+            .then(() => page.route('*://video.twimg.com/**', route =>
+              route.abort().then(() => page.evaluate(videoUrl => {
+                let videoUrls: string[] = window['__scrapedVideoUrls'];
+                if (!videoUrls) videoUrls = window['__scrapedVideoUrls'] = [];
+                if (!videoUrls.includes(videoUrl)) {
+                  videoUrls.push(videoUrl);
+                  return videoUrl;
+                }
+              }, route.request().url())).then(videoUrl => {
+                if (videoUrl) logger.info(`scraped ${route.request().url()} from page`);
+              }).catch(err => {
+                logger.error(`error aborting request to ${route.request().url()}, error: ${err}`);
+              })
+            ))
+            .then(gotoUrlAndWaitForTweet)
             // hide header, "more options" button, like and retweet count
             .then(() => page.addStyleTag({
               content: 'header,#layers{display:none!important}article{background-color:transparent!important}' +
@@ -163,7 +200,9 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
                 const path = temp.path({suffix: '.html'});
                 writeFileSync(path, html);
                 logger.warn(`saved debug html to ${path}`);
-              }).then(() => page.route('**/*', route => route.abort())
+              }).then(() => page.route('**/*', route => route.abort().catch(err => {
+                logger.error(`error aborting all requests for debug screenshot, error: ${err}`);
+              }))
               ).then(() => page.screenshot({fullPage: true})).then(screenshot => {
                 sharpToFile(sharp(screenshot).jpeg({ quality: 90 })).then(fileUri => {
                   logger.warn(`saved debug screenshot to ${fileUri.substring(7)}`);
@@ -172,8 +211,12 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
             })
             .then(handle => {
               if (handle === null) throw new puppeteer.errors.TimeoutError();
+              let cropTop: number;
               return chainPromises(morePostProcessings.map(func => () => func(page, handle)))
                 .then(() => promisify(setTimeout)(getTimeout()))
+                // determine screenshot crop y offset
+                .then(() => page.evaluate(() => document.documentElement.scrollTop))
+                .then(scrollTop => { cropTop = scrollTop * zoomFactor; })
                 // hide highlight of retweet header
                 .then(() => page.evaluate(() => (document.activeElement as unknown as HTMLOrSVGElement).blur()))
                 // determine screenshot height
@@ -187,16 +230,23 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
                   logger.error(`error while parsing content height, failing this webshot`);
                   throw err;
                 })
-                .then(parentDivHandle => parentDivHandle.screenshot());
+                .then(parentDivHandle => parentDivHandle.screenshot())
+                .then<[Buffer, number]>(screenshot => [screenshot, cropTop]);
             })
-            .then(screenshot => {
+            .then(([screenshot, cropTop]) => {
               new PNG({
                 filterType: 4,
                 deflateLevel: 0,
               }).on('parsed', function () {
-                sharpToFile(jpeg(this.pack())).then(path => {
+                let png = this;
+                if (cropTop > 0) {
+                  logger.info(`cropping screenshot at y offset ${cropTop}...`);
+                  png = new PNG({width: this.width, height: this.height - cropTop});
+                  this.bitblt(png, 0, cropTop, png.width, png.height, 0, 0);
+                }
+                sharpToFile(jpeg(png.pack())).then(path => {
                   logger.info(`finished webshot for ${url}`);
-                  resolve({path, boundary: this.height});
+                  resolve({path, boundary: png.height});
                 });
               }).parse(screenshot);
             })
@@ -218,31 +268,15 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
     );
   };
 
-  private fetchMedia = (url: string): Promise<string> => new Promise<ArrayBuffer>((resolve, reject) => {
-    logger.info(`fetching ${url}`);
-    axios({
-      method: 'get',
-      url,
-      responseType: 'arraybuffer',
-      timeout: 150000,
-    }).then(res => {
-      if (res.status === 200) {
-        logger.info(`successfully fetched ${url}`);
-        resolve(res.data);
-      } else {
-        logger.error(`failed to fetch ${url}: ${res.status}`);
-        reject();
-      }
-    }).catch (err => {
-      logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`);
-      reject();
-    });
-  }).then(data =>
-    (ext => {
-      const mediaTempFilePath = temp.path({suffix: `.${ext}`});
-      writeFileSync(mediaTempFilePath, Buffer.from(data));
-      const path = `file://${mediaTempFilePath}`;
-      switch (ext) {
+  private fetchMedia = (url: string) =>
+    (url.match(/^file:/) ? Promise.resolve(url) : axiosGet(url, 'arraybuffer').then(data =>
+      (ext => {
+        const mediaTempFilePath = temp.path({suffix: `.${ext}`});
+        writeFileSync(mediaTempFilePath, Buffer.from(data));
+        return `file://${mediaTempFilePath}`;
+      })(((/\?format=([a-z]+)&/.exec(url)) ?? (/.*\/.*\.([^?]+)/.exec(url)))[1])
+    )).then(path => {
+      switch ((/.*\.(.*?)$/.exec(path) || [])[1]) {
         case 'jpg':
         case 'png':
           return Message.Image(path);
@@ -251,8 +285,7 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
       }
       logger.warn('unable to find MIME type of fetched media, failing this fetch');
       throw Error();
-    })(((/\?format=([a-z]+)&/.exec(url)) ?? (/.*\/.*\.([^?]+)/.exec(url)))[1])
-  );
+    });
 
   public webshot(
     tweets: Tweet[],
@@ -291,15 +324,10 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
       // invoke webshot
       if (this.mode === 0) {
         const url = `https://mobile.twitter.com/${twi.user.screen_name}/status/${twi.id_str}`;
-        const extendEntity = (cardImg: MediaEntity) => {
-          originTwi.extended_entities = {
-            ...originTwi.extended_entities,
-            media: [
-              ...originTwi.extended_entities?.media ?? [],
-              cardImg,
-            ],
-          };
-        };
+        const extendEntity = (cardMedia: MediaEntity) =>
+          (media => {
+            if (!media.some(entity => entity.id_str === cardMedia.id_str)) media.push(cardMedia);
+          })((originTwi.extended_entities ||= {}).media ||= []);
         const truncateLongThread = (atId: string) => {
           if (!atId) return;
           logger.info(`thread too long, truncating at tweet ${atId}...`);
@@ -326,28 +354,85 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
             document.documentElement.scrollTop = 0;
           }).then(truncateLongThread),
 
-          // scrape card image from main tweet
-          (_, tweetHandle: puppeteer.ElementHandle<HTMLDivElement>) => tweetHandle.evaluate(div => {
-            const cardImg = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"] img');
-            if (typeof cardImg?.getAttribute('src') === 'string') {
-              const match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardImg?.getAttribute('src'));
-              if (match) {
-                // tslint:disable-next-line: variable-name
-                const [media_url_https, id_str] = match.slice(1);
-                return {
+          // scrape card media from main tweet
+          (page, tweetHandle: puppeteer.ElementHandle<HTMLDivElement>) => tweetHandle.evaluate(div => {
+            const cardMedia = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"] img, video');
+            let match: RegExpExecArray;
+            if (cardMedia?.tagName === 'IMG' && typeof cardMedia?.getAttribute('src') === 'string') {
+              match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardMedia?.getAttribute('src'));
+            }
+            if (cardMedia?.tagName === 'VIDEO' && typeof cardMedia?.getAttribute('poster') === 'string') {
+              match = /^(.*\/amplify_video_thumb\/(\d+)\/img\/.*$)/.exec(cardMedia?.getAttribute('poster'));
+            }
+            if (match) {
+              const [media_url_https, id_str] = match.slice(1);
+              return {
+                type: cardMedia.tagName,
+                entityBase: {
                   media_url: media_url_https.replace(/^https/, 'http'),
                   media_url_https,
                   url: '',
                   display_url: '',
                   expanded_url: '',
-                  type: 'photo',
                   id: Number(id_str),
                   id_str,
                   sizes: undefined,
-                };
-              }
+                }
+              };
             }
-          }).then(cardImg => { if (cardImg) extendEntity(cardImg); })
+            return {};
+          }).then(({type, entityBase}) => {
+            if (type === 'IMG') extendEntity({
+              ...entityBase,
+              type: 'photo',
+            });
+            if (type === 'VIDEO') page.evaluate(
+              id_str => (window['__scrapedVideoUrls'] as string[])?.filter(videoUrl =>
+                new RegExp(`.*/amplify_video/${id_str}.*\\.m3u8(?:\\?|$)`).exec(videoUrl)
+              ),
+              entityBase.id_str
+            ).then(videoUrls => {
+              if (videoUrls && videoUrls.length) {
+                Promise.all(videoUrls.map(streamlistUrl =>
+                  axiosGet(streamlistUrl, 'text')
+                    .then(streamlist => M3u8.parseStreamlist(streamlist)[0])
+                    .then(({bandwidth, playlistPath, resolution}) => {
+                      const [width, height] = /(.*)x(.*)/.exec(resolution).slice(1).map(Number);
+                      const playlistUrl = new URL(playlistPath, streamlistUrl);
+                      const mediaTempFilePath = temp.path({suffix: `.mp4`});
+                      return axiosGet(playlistUrl.href, 'text')
+                        .then(playlist => M3u8.parsePlaylist(playlist))
+                        .then(({duration, segmentPaths}) =>
+                          chainPromises(segmentPaths.map(path => () =>
+                            axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => {
+                              writeFileSync(mediaTempFilePath, Buffer.from(data), {flag: 'a'});
+                            })
+                          )).then(() => ({
+                            duration_millis: duration * 1000,
+                            aspect_ratio: [width, height],
+                            variants: [{
+                              bitrate: bandwidth,
+                              content_type: 'video/mp4',
+                              url: `file://${mediaTempFilePath}`,
+                            }]
+                          }) as MediaEntity['video_info'])
+                        )
+                    })
+                )).then(videoInfos =>
+                  videoInfos.reduce((vi1, vi2) => ({
+                    ...vi1,
+                    variants: vi1.variants.concat(vi2.variants)
+                  }))
+                ).then(videoInfo => extendEntity({
+                  ...entityBase,
+                  type: 'video',
+                  video_info: videoInfo,
+                })).catch(error => {
+                  logger.warn('unable to fetch scraped video, ignoring...');
+                });
+              }
+            });
+          })
         ))
           .then(fileurl => {
             if (fileurl) return Message.Image(fileurl);