import { writeFileSync } from 'fs'; import { Readable } from 'stream'; import { promisify } from 'util'; import axios from 'axios'; import * as CallableInstance from 'callable-instance'; import { XmlEntities } from 'html-entities'; import { PNG } from 'pngjs'; import * as puppeteer from 'playwright'; import * as sharp from 'sharp'; import * as temp from 'temp'; import { getLogger } from './loggers'; import { Message } from './koishi'; import { chainPromises } from './utils'; import { linkBuilder, MediaItem, LazyMediaItem, WebshotHelpers } from './twitter'; const xmlEntities = new XmlEntities(); const ZHType = (type: string) => new class extends String { public type = super.toString(); public toString = () => `[${super.toString()}]`; }(type); const typeInZH = { photo: ZHType('图片'), video: ZHType('视频'), }; const logger = getLogger('webshot'); export type Page = puppeteer.Page; export type Cookies = puppeteer.Cookie[]; class Webshot extends CallableInstance<[LazyMediaItem[], (...args) => void, number], Promise> { private browser: puppeteer.Browser; private mode: number; private wsUrl: string; private getCookies: () => Cookies; constructor( wsUrl: string, mode: number, getCookies: () => Cookies, onready: (doOnNewPage?: typeof Webshot.prototype.performOnNewPage) => void ) { super('webshot'); // tslint:disable-next-line: no-conditional-assignment // eslint-disable-next-line no-cond-assign if (this.mode = mode) { if (onready) onready(); } else { this.getCookies = getCookies; this.wsUrl = wsUrl; this.connect(() => onready && onready(this.performOnNewPage)); } } private connect = (onready?: (...args) => void): Promise => axios.get<{[key in 'chromium' | 'firefox' | 'webkit']?: string}>(this.wsUrl) .then(res => { logger.info(`received websocket endpoint: ${JSON.stringify(res.data)}`); const browserType = Object.keys(res.data)[0] as keyof typeof res.data; return (puppeteer[browserType] as puppeteer.BrowserType) .connect({wsEndpoint: res.data[browserType]}); }) .then(browser => this.browser = browser) .then(() => { logger.info('launched puppeteer browser'); if (onready) return onready(); }) .catch(error => this.reconnect(error, onready)); private reconnect = (error, onready?: (...args) => void) => { logger.error(`connection error, reason: ${error}`); logger.warn('trying to reconnect in 2.5s...'); return promisify(setTimeout)(2500) .then(() => this.connect(onready)); }; private performOnNewPage = (action: (page: Page) => T | PromiseLike, zoomFactor = 2, reconnectOnError = true) => this.browser.newPage({ bypassCSP: true, deviceScaleFactor: zoomFactor, locale: 'ja-JP', timezoneId: 'Asia/Tokyo', userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', }).then(action) .catch(error => { if (reconnectOnError) { return this.reconnect(error) .then((): Promise => this.performOnNewPage(action, zoomFactor, reconnectOnError)); } throw error; }); private renderWebshot = ( url: string, height: number, webshotDelay: number, ...morePostProcessings: ((page: Page) => Promise)[] ): Promise => { temp.track(); const jpeg = (data: Readable) => data.pipe(sharp()).jpeg({quality: 90, trellisQuantisation: true}); const sharpToFile = (pic: sharp.Sharp) => new Promise(resolve => { const webshotTempFilePath = temp.path({suffix: '.jpg'}); pic.toFile(webshotTempFilePath).then(() => resolve(`file://${webshotTempFilePath}`)); }); const promise = new Promise<{ path: string, boundary: null | number }>((resolve, reject) => { const width = 720; const zoomFactor = 2; logger.info(`shooting ${width}*${height} webshot for ${url}`); this.performOnNewPage( page => { const startTime = new Date().getTime(); const getTimerTime = () => new Date().getTime() - startTime; const getTimeout = () => WebshotHelpers.isWaitingForLogin ? 0 : Math.max(500, webshotDelay - getTimerTime()); page.setViewportSize({ width: width / zoomFactor, height: height / zoomFactor, }).then(() => page.context().addCookies(this.getCookies())) .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()})) .then(() => WebshotHelpers.handleCookieConsent(page)) .then(() => ((next: () => Promise) => Promise.race([ WebshotHelpers.handleLogin(page) .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()})) .then(next), next(), ]))(() => promisify(setTimeout)(2000).then(() => page.waitForSelector('article', {timeout: getTimeout()}))) ) .catch((err: Error): Promise | null> => { if (err.name !== 'TimeoutError') throw err; logger.warn(`navigation timed out at ${getTimerTime()} ms`); return null; }) // hide header, footer, "more options" button, like and share count, avatar stories/live indicator // person tags, carousel navigator, 2nd avatar, and comments; adjust layout for center alignment .then(() => page.addStyleTag({content: 'nav,footer,main>*>*+*,header+div,header~div>div>div+div,main button,canvas,main section,main section+div>ul>:not(div),' + 'main section+div>ul>div [role="button"],header~div [tabindex="0"]>*>[tabindex="-1"]~div{display:none!important} ' + 'section+div{overflow:hidden} section+*>*{position:relative!important} article{border-bottom:1px solid!important} ' + 'main section+div>ul>div>li{padding:6px 2px 12px!important}', })) .then(() => page.addStyleTag({ content: '*{font-family:-apple-system,".Helvetica Neue DeskInterface",Hiragino Sans,Hiragino Sans GB,sans-serif!important}', })) // display absolute date and time and remove "comment limited" notice .then(() => page.evaluate(() => { let time: HTMLTimeElement; time = document.querySelector('div>div>time'); if (time) time.parentElement.parentElement.style.display = 'none'; time = document.querySelector('main section~div>a>time'); if (time) { time.innerHTML = time.title + ' ' + new Date(time.dateTime).toLocaleTimeString().slice(0, -3); time.parentElement.parentElement.style.margin = '-24px 2px 12px'; const element = time.parentElement.parentElement.nextElementSibling as HTMLElement; if (element) element.style.display = 'none'; } })) .then(() => chainPromises(morePostProcessings.map(func => () => func(page)))) .then(() => promisify(setTimeout)(getTimeout())) .then(() => page.screenshot()) .then(screenshot => { new PNG({ filterType: 4, deflateLevel: 0, }).on('parsed', function () { const idx = (x: number, y: number) => (this.width * y + x) << 2; let boundary: number = null; for (let y = this.height - 1; y > this.height - 3840; y -= zoomFactor) { if ( this.data[idx(zoomFactor, y)] <= 38 && this.data[idx(zoomFactor, y)] === this.data[idx(this.width - zoomFactor, y)] && this.data[idx(zoomFactor, y + zoomFactor)] === this.data[idx(zoomFactor, y - 2 * zoomFactor)] ) { boundary = y - 1; break; } } if (boundary !== null) { logger.info(`found boundary at ${boundary}, cropping image`); this.data = this.data.slice(0, idx(this.width, boundary)); this.height = boundary; sharpToFile(jpeg(this.pack())).then(path => { logger.info(`finished webshot for ${url}`); resolve({path, boundary}); }); } else if (height >= 8 * 1920) { logger.warn('too large, consider as a bug, returning'); sharpToFile(jpeg(this.pack())).then(path => { resolve({path, boundary: 0}); }); } else { logger.info('unable to find boundary, try shooting a larger image'); resolve({path: '', boundary}); } }).parse(screenshot); }) .catch(err => { if (err instanceof Error && err.name !== 'TimeoutError') throw err; logger.error(`error shooting webshot for ${url}, could not load web page of tweet`); resolve({path: '', boundary: 0}); }) .finally(() => { page.close(); }); }, zoomFactor, false) .catch(reject); }); return promise.then(data => { if (data.boundary === null) { return this.renderWebshot(url, height + 3840, webshotDelay, ...morePostProcessings); } else return data.path; }).catch(error => this.reconnect(error) .then(() => this.renderWebshot(url, height, webshotDelay, ...morePostProcessings)) ); }; private fetchMedia = (url: string): Promise => new Promise((resolve, reject) => { logger.info(`fetching ${url}`); axios({ method: 'get', url, responseType: 'arraybuffer', timeout: 150000, }).then(res => { if (res.status === 200) { logger.info(`successfully fetched ${url}`); resolve(res.data); } else { logger.error(`failed to fetch ${url}: ${res.status}`); reject(); } }).catch (err => { logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`); reject(); }); }).then(data => (ext => { const mediaTempFilePath = temp.path({suffix: `.${ext}`}); writeFileSync(mediaTempFilePath, Buffer.from(data)); const path = `file://${mediaTempFilePath}`; switch (ext) { case 'jpg': case 'png': return Message.Image(path); case 'mp4': return Message.Video(path); } logger.warn('unable to find MIME type of fetched media, failing this fetch'); throw Error(); })(/\/.*\.(.+?)\?/.exec(url)[1]) ); public webshot( lazyMediaItems: LazyMediaItem[], callback: (msgs: string, text: string, author: string) => void, webshotDelay: number ): Promise { let grandPromise = Promise.resolve(); // eslint-disable-next-line @typescript-eslint/no-misused-promises lazyMediaItems.forEach(lazyItem => grandPromise = grandPromise.then(lazyItem.item).then(item => { let promise = Promise.resolve(); promise = promise.then(() => { logger.info(`working on ${item.user.username}/${item.code}`); }); let messageChain = ''; // text processing const author = `${item.user.full_name} (@${item.user.username}):\n`; const text = item.caption?.text || ''; if (this.mode > 0) messageChain += (author + xmlEntities.decode(text)); // invoke webshot if (this.mode === 0) { const url = linkBuilder({postUrlSegment: item.code}); promise = promise.then(() => this.renderWebshot(url, 3840, webshotDelay, page => // display full name page.addStyleTag({content: 'header>div>div+div{font-size:12px; line-height:15px; padding-top:0!important}' + `header>div>div+div::before{content:"${item.user.full_name}"; color:#8e8e8e; font-weight:bold}`, }) )) .then(fileurl => { if (fileurl) return Message.Image(fileurl); return author + text; }) .then(msg => { if (msg) messageChain += msg; }); } // fetch extra entities const type = (mediaItem): keyof typeof typeInZH => (mediaItem as MediaItem).video_versions ? 'video' : 'photo'; const fetchBestCandidate =( candidates: (Partial & typeof item.image_versions2.candidates[0])[], mediaType: keyof typeof typeInZH ) => { const url = candidates .sort((var1, var2) => var2.width + (var2?.type || 0) - var1.width - (var1?.type || 0)) .map(variant => variant.url)[0]; // largest media const altMessage = `\n[失败的${typeInZH[mediaType].type}:${url}]`; return this.fetchMedia(url) .catch(error => { logger.warn('unable to fetch media, sending plain text instead...'); return altMessage; }) .then(msg => { messageChain += msg; }); }; // tslint:disable-next-line: curly // eslint-disable-next-line curly if (1 - this.mode % 2) promise = promise.then(() => { if (item.carousel_media) { return chainPromises(item.carousel_media.map(carouselItem => () => fetchBestCandidate( (carouselItem as unknown as MediaItem).video_versions || carouselItem.image_versions2.candidates, type(carouselItem) ) )); } else if (item.video_versions) { return fetchBestCandidate(item.video_versions, type(item)); } else if (item.image_versions2) { return fetchBestCandidate(item.image_versions2.candidates, type(item)); } }); promise.then(() => { logger.info(`done working on ${item.user.username}/${item.code}, message chain:`); logger.info(JSON.stringify(Message.ellipseBase64(messageChain))); callback(messageChain, xmlEntities.decode(text), author); }); return promise; })); return grandPromise; } } export default Webshot;