|  | @@ -2,7 +2,7 @@ import { writeFileSync } from 'fs';
 | 
	
		
			
				|  |  |  import { Readable } from 'stream';
 | 
	
		
			
				|  |  |  import { promisify } from 'util';
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -import axios from 'axios';
 | 
	
		
			
				|  |  | +import axios, { ResponseType as AxiosResponseType } from 'axios';
 | 
	
		
			
				|  |  |  import * as CallableInstance from 'callable-instance';
 | 
	
		
			
				|  |  |  import { XmlEntities } from 'html-entities';
 | 
	
		
			
				|  |  |  import { PNG } from 'pngjs';
 | 
	
	
		
			
				|  | @@ -13,7 +13,7 @@ import * as temp from 'temp';
 | 
	
		
			
				|  |  |  import { getLogger } from './loggers';
 | 
	
		
			
				|  |  |  import { Message } from './koishi';
 | 
	
		
			
				|  |  |  import { MediaEntity, Tweet } from './twitter';
 | 
	
		
			
				|  |  | -import { chainPromises } from './utils';
 | 
	
		
			
				|  |  | +import { chainPromises, M3u8 } from './utils';
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  const xmlEntities = new XmlEntities();
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -30,6 +30,27 @@ const typeInZH = {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  const logger = getLogger('webshot');
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +const axiosGet = <T extends AxiosResponseType>(url: string, responseType: T, timeout = 150000) => {
 | 
	
		
			
				|  |  | +  logger.info(`fetching ${url}`);
 | 
	
		
			
				|  |  | +  return axios({
 | 
	
		
			
				|  |  | +    method: 'get',
 | 
	
		
			
				|  |  | +    url,
 | 
	
		
			
				|  |  | +    responseType,
 | 
	
		
			
				|  |  | +    timeout,
 | 
	
		
			
				|  |  | +  }).then(res => {
 | 
	
		
			
				|  |  | +    if (res.status === 200) {
 | 
	
		
			
				|  |  | +      logger.info(`successfully fetched ${url}`);
 | 
	
		
			
				|  |  | +      return res.data as {text: string, arraybuffer: ArrayBuffer, [k: string]: any}[T];
 | 
	
		
			
				|  |  | +    } else {
 | 
	
		
			
				|  |  | +      logger.error(`failed to fetch ${url}: ${res.status}`);
 | 
	
		
			
				|  |  | +      throw new Error();
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  }).catch (err => {
 | 
	
		
			
				|  |  | +    logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`);
 | 
	
		
			
				|  |  | +    throw new Error();
 | 
	
		
			
				|  |  | +  });
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Promise<void>> {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    private browser: puppeteer.Browser;
 | 
	
	
		
			
				|  | @@ -115,7 +136,20 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
 | 
	
		
			
				|  |  |              width: width / zoomFactor,
 | 
	
		
			
				|  |  |              height: height / zoomFactor,
 | 
	
		
			
				|  |  |            })
 | 
	
		
			
				|  |  | -            .then(() => page.route('*:\/\/video.twimg.com\/**', route => route.abort()))
 | 
	
		
			
				|  |  | +            .then(() => page.route('*://video.twimg.com/**', route =>
 | 
	
		
			
				|  |  | +              route.abort().then(() => page.evaluate(videoUrl => {
 | 
	
		
			
				|  |  | +                let videoUrls: string[] = window['__scrapedVideoUrls'];
 | 
	
		
			
				|  |  | +                if (!videoUrls) videoUrls = window['__scrapedVideoUrls'] = [];
 | 
	
		
			
				|  |  | +                if (!videoUrls.includes(videoUrl)) {
 | 
	
		
			
				|  |  | +                  videoUrls.push(videoUrl);
 | 
	
		
			
				|  |  | +                  return videoUrl;
 | 
	
		
			
				|  |  | +                }
 | 
	
		
			
				|  |  | +              }, route.request().url())).then(videoUrl => {
 | 
	
		
			
				|  |  | +                if (videoUrl) logger.info(`scraped ${route.request().url()} from page`);
 | 
	
		
			
				|  |  | +              }).catch(err => {
 | 
	
		
			
				|  |  | +                logger.error(`error aborting request to ${route.request().url()}, error: ${err}`);
 | 
	
		
			
				|  |  | +              })
 | 
	
		
			
				|  |  | +            ))
 | 
	
		
			
				|  |  |              .then(gotoUrlAndWaitForTweet)
 | 
	
		
			
				|  |  |              // hide header, "more options" button, like and retweet count
 | 
	
		
			
				|  |  |              .then(() => page.addStyleTag({
 | 
	
	
		
			
				|  | @@ -232,31 +266,15 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
 | 
	
		
			
				|  |  |      );
 | 
	
		
			
				|  |  |    };
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  private fetchMedia = (url: string): Promise<string> => new Promise<ArrayBuffer>((resolve, reject) => {
 | 
	
		
			
				|  |  | -    logger.info(`fetching ${url}`);
 | 
	
		
			
				|  |  | -    axios({
 | 
	
		
			
				|  |  | -      method: 'get',
 | 
	
		
			
				|  |  | -      url,
 | 
	
		
			
				|  |  | -      responseType: 'arraybuffer',
 | 
	
		
			
				|  |  | -      timeout: 150000,
 | 
	
		
			
				|  |  | -    }).then(res => {
 | 
	
		
			
				|  |  | -      if (res.status === 200) {
 | 
	
		
			
				|  |  | -        logger.info(`successfully fetched ${url}`);
 | 
	
		
			
				|  |  | -        resolve(res.data);
 | 
	
		
			
				|  |  | -      } else {
 | 
	
		
			
				|  |  | -        logger.error(`failed to fetch ${url}: ${res.status}`);
 | 
	
		
			
				|  |  | -        reject();
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -    }).catch (err => {
 | 
	
		
			
				|  |  | -      logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`);
 | 
	
		
			
				|  |  | -      reject();
 | 
	
		
			
				|  |  | -    });
 | 
	
		
			
				|  |  | -  }).then(data =>
 | 
	
		
			
				|  |  | -    (ext => {
 | 
	
		
			
				|  |  | -      const mediaTempFilePath = temp.path({suffix: `.${ext}`});
 | 
	
		
			
				|  |  | -      writeFileSync(mediaTempFilePath, Buffer.from(data));
 | 
	
		
			
				|  |  | -      const path = `file://${mediaTempFilePath}`;
 | 
	
		
			
				|  |  | -      switch (ext) {
 | 
	
		
			
				|  |  | +  private fetchMedia = (url: string) =>
 | 
	
		
			
				|  |  | +    (url.match(/^file:/) ? Promise.resolve(url) : axiosGet(url, 'arraybuffer').then(data =>
 | 
	
		
			
				|  |  | +      (ext => {
 | 
	
		
			
				|  |  | +        const mediaTempFilePath = temp.path({suffix: `.${ext}`});
 | 
	
		
			
				|  |  | +        writeFileSync(mediaTempFilePath, Buffer.from(data));
 | 
	
		
			
				|  |  | +        return `file://${mediaTempFilePath}`;
 | 
	
		
			
				|  |  | +      })(((/\?format=([a-z]+)&/.exec(url)) ?? (/.*\/.*\.([^?]+)/.exec(url)))[1])
 | 
	
		
			
				|  |  | +    )).then(path => {
 | 
	
		
			
				|  |  | +      switch ((/.*\.(.*?)$/.exec(path) || [])[1]) {
 | 
	
		
			
				|  |  |          case 'jpg':
 | 
	
		
			
				|  |  |          case 'png':
 | 
	
		
			
				|  |  |            return Message.Image(path);
 | 
	
	
		
			
				|  | @@ -265,8 +283,7 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |        logger.warn('unable to find MIME type of fetched media, failing this fetch');
 | 
	
		
			
				|  |  |        throw Error();
 | 
	
		
			
				|  |  | -    })(((/\?format=([a-z]+)&/.exec(url)) ?? (/.*\/.*\.([^?]+)/.exec(url)))[1])
 | 
	
		
			
				|  |  | -  );
 | 
	
		
			
				|  |  | +    });
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    public webshot(
 | 
	
		
			
				|  |  |      tweets: Tweet[],
 | 
	
	
		
			
				|  | @@ -305,15 +322,10 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
 | 
	
		
			
				|  |  |        // invoke webshot
 | 
	
		
			
				|  |  |        if (this.mode === 0) {
 | 
	
		
			
				|  |  |          const url = `https://mobile.twitter.com/${twi.user.screen_name}/status/${twi.id_str}`;
 | 
	
		
			
				|  |  | -        const extendEntity = (cardImg: MediaEntity) => {
 | 
	
		
			
				|  |  | -          originTwi.extended_entities = {
 | 
	
		
			
				|  |  | -            ...originTwi.extended_entities,
 | 
	
		
			
				|  |  | -            media: [
 | 
	
		
			
				|  |  | -              ...originTwi.extended_entities?.media ?? [],
 | 
	
		
			
				|  |  | -              cardImg,
 | 
	
		
			
				|  |  | -            ],
 | 
	
		
			
				|  |  | -          };
 | 
	
		
			
				|  |  | -        };
 | 
	
		
			
				|  |  | +        const extendEntity = (cardMedia: MediaEntity) =>
 | 
	
		
			
				|  |  | +          (media => {
 | 
	
		
			
				|  |  | +            if (!media.some(entity => entity.id_str === cardMedia.id_str)) media.push(cardMedia);
 | 
	
		
			
				|  |  | +          })((originTwi.extended_entities ||= {}).media ||= []);
 | 
	
		
			
				|  |  |          const truncateLongThread = (atId: string) => {
 | 
	
		
			
				|  |  |            if (!atId) return;
 | 
	
		
			
				|  |  |            logger.info(`thread too long, truncating at tweet ${atId}...`);
 | 
	
	
		
			
				|  | @@ -340,28 +352,85 @@ class Webshot extends CallableInstance<[Tweet[], (...args) => void, number], Pro
 | 
	
		
			
				|  |  |              document.documentElement.scrollTop = 0;
 | 
	
		
			
				|  |  |            }).then(truncateLongThread),
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -          // scrape card image from main tweet
 | 
	
		
			
				|  |  | -          (_, tweetHandle: puppeteer.ElementHandle<HTMLDivElement>) => tweetHandle.evaluate(div => {
 | 
	
		
			
				|  |  | -            const cardImg = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"] img');
 | 
	
		
			
				|  |  | -            if (typeof cardImg?.getAttribute('src') === 'string') {
 | 
	
		
			
				|  |  | -              const match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardImg?.getAttribute('src'));
 | 
	
		
			
				|  |  | -              if (match) {
 | 
	
		
			
				|  |  | -                // tslint:disable-next-line: variable-name
 | 
	
		
			
				|  |  | -                const [media_url_https, id_str] = match.slice(1);
 | 
	
		
			
				|  |  | -                return {
 | 
	
		
			
				|  |  | +          // scrape card media from main tweet
 | 
	
		
			
				|  |  | +          (page, tweetHandle: puppeteer.ElementHandle<HTMLDivElement>) => tweetHandle.evaluate(div => {
 | 
	
		
			
				|  |  | +            const cardMedia = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"] img, video');
 | 
	
		
			
				|  |  | +            let match: RegExpExecArray;
 | 
	
		
			
				|  |  | +            if (cardMedia?.tagName === 'IMG' && typeof cardMedia?.getAttribute('src') === 'string') {
 | 
	
		
			
				|  |  | +              match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardMedia?.getAttribute('src'));
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            if (cardMedia?.tagName === 'VIDEO' && typeof cardMedia?.getAttribute('poster') === 'string') {
 | 
	
		
			
				|  |  | +              match = /^(.*\/amplify_video_thumb\/(\d+)\/img\/.*$)/.exec(cardMedia?.getAttribute('poster'));
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            if (match) {
 | 
	
		
			
				|  |  | +              const [media_url_https, id_str] = match.slice(1);
 | 
	
		
			
				|  |  | +              return {
 | 
	
		
			
				|  |  | +                type: cardMedia.tagName,
 | 
	
		
			
				|  |  | +                entityBase: {
 | 
	
		
			
				|  |  |                    media_url: media_url_https.replace(/^https/, 'http'),
 | 
	
		
			
				|  |  |                    media_url_https,
 | 
	
		
			
				|  |  |                    url: '',
 | 
	
		
			
				|  |  |                    display_url: '',
 | 
	
		
			
				|  |  |                    expanded_url: '',
 | 
	
		
			
				|  |  | -                  type: 'photo',
 | 
	
		
			
				|  |  |                    id: Number(id_str),
 | 
	
		
			
				|  |  |                    id_str,
 | 
	
		
			
				|  |  |                    sizes: undefined,
 | 
	
		
			
				|  |  | -                };
 | 
	
		
			
				|  |  | -              }
 | 
	
		
			
				|  |  | +                }
 | 
	
		
			
				|  |  | +              };
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  | -          }).then(cardImg => { if (cardImg) extendEntity(cardImg); })
 | 
	
		
			
				|  |  | +            return {};
 | 
	
		
			
				|  |  | +          }).then(({type, entityBase}) => {
 | 
	
		
			
				|  |  | +            if (type === 'IMG') extendEntity({
 | 
	
		
			
				|  |  | +              ...entityBase,
 | 
	
		
			
				|  |  | +              type: 'photo',
 | 
	
		
			
				|  |  | +            });
 | 
	
		
			
				|  |  | +            if (type === 'VIDEO') page.evaluate(
 | 
	
		
			
				|  |  | +              id_str => (window['__scrapedVideoUrls'] as string[])?.filter(videoUrl =>
 | 
	
		
			
				|  |  | +                new RegExp(`.*/amplify_video/${id_str}.*\\.m3u8(?:\\?|$)`).exec(videoUrl)
 | 
	
		
			
				|  |  | +              ),
 | 
	
		
			
				|  |  | +              entityBase.id_str
 | 
	
		
			
				|  |  | +            ).then(videoUrls => {
 | 
	
		
			
				|  |  | +              if (videoUrls && videoUrls.length) {
 | 
	
		
			
				|  |  | +                Promise.all(videoUrls.map(streamlistUrl =>
 | 
	
		
			
				|  |  | +                  axiosGet(streamlistUrl, 'text')
 | 
	
		
			
				|  |  | +                    .then(streamlist => M3u8.parseStreamlist(streamlist)[0])
 | 
	
		
			
				|  |  | +                    .then(({bandwidth, playlistPath, resolution}) => {
 | 
	
		
			
				|  |  | +                      const [width, height] = /(.*)x(.*)/.exec(resolution).slice(1).map(Number);
 | 
	
		
			
				|  |  | +                      const playlistUrl = new URL(playlistPath, streamlistUrl);
 | 
	
		
			
				|  |  | +                      const mediaTempFilePath = temp.path({suffix: `.mp4`});
 | 
	
		
			
				|  |  | +                      return axiosGet(playlistUrl.href, 'text')
 | 
	
		
			
				|  |  | +                        .then(playlist => M3u8.parsePlaylist(playlist))
 | 
	
		
			
				|  |  | +                        .then(({duration, segmentPaths}) =>
 | 
	
		
			
				|  |  | +                          chainPromises(segmentPaths.map(path => () =>
 | 
	
		
			
				|  |  | +                            axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => {
 | 
	
		
			
				|  |  | +                              writeFileSync(mediaTempFilePath, Buffer.from(data), {flag: 'a'});
 | 
	
		
			
				|  |  | +                            })
 | 
	
		
			
				|  |  | +                          )).then(() => ({
 | 
	
		
			
				|  |  | +                            duration_millis: duration * 1000,
 | 
	
		
			
				|  |  | +                            aspect_ratio: [width, height],
 | 
	
		
			
				|  |  | +                            variants: [{
 | 
	
		
			
				|  |  | +                              bitrate: bandwidth,
 | 
	
		
			
				|  |  | +                              content_type: 'video/mp4',
 | 
	
		
			
				|  |  | +                              url: `file://${mediaTempFilePath}`,
 | 
	
		
			
				|  |  | +                            }]
 | 
	
		
			
				|  |  | +                          }) as MediaEntity['video_info'])
 | 
	
		
			
				|  |  | +                        )
 | 
	
		
			
				|  |  | +                    })
 | 
	
		
			
				|  |  | +                )).then(videoInfos =>
 | 
	
		
			
				|  |  | +                  videoInfos.reduce((vi1, vi2) => ({
 | 
	
		
			
				|  |  | +                    ...vi1,
 | 
	
		
			
				|  |  | +                    variants: vi1.variants.concat(vi2.variants)
 | 
	
		
			
				|  |  | +                  }))
 | 
	
		
			
				|  |  | +                ).then(videoInfo => extendEntity({
 | 
	
		
			
				|  |  | +                  ...entityBase,
 | 
	
		
			
				|  |  | +                  type: 'video',
 | 
	
		
			
				|  |  | +                  video_info: videoInfo,
 | 
	
		
			
				|  |  | +                })).catch(error => {
 | 
	
		
			
				|  |  | +                  logger.warn('unable to fetch scraped video, ignoring...');
 | 
	
		
			
				|  |  | +                });
 | 
	
		
			
				|  |  | +              }
 | 
	
		
			
				|  |  | +            });
 | 
	
		
			
				|  |  | +          })
 | 
	
		
			
				|  |  |          ))
 | 
	
		
			
				|  |  |            .then(fileurl => {
 | 
	
		
			
				|  |  |              if (fileurl) return Message.Image(fileurl);
 |