"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const fs = require("fs"); const util_1 = require("util"); const axios_1 = require("axios"); const CallableInstance = require("callable-instance"); const html_entities_1 = require("html-entities"); const pngjs_1 = require("pngjs"); const puppeteer = require("playwright"); const sharp = require("sharp"); const temp = require("temp"); const loggers_1 = require("./loggers"); const koishi_1 = require("./koishi"); const utils_1 = require("./utils"); const xmlEntities = new html_entities_1.XmlEntities(); const typeInZH = { photo: '图片', video: '视频', animated_gif: 'GIF', }; const logger = (0, loggers_1.getLogger)('webshot'); const axiosGet = (url, responseType, timeout = 150000) => { logger.info(`fetching ${url}`); return (0, axios_1.default)({ method: 'get', url, responseType, timeout, }).then(res => { if (res.status === 200) { logger.info(`successfully fetched ${url}`); return res.data; } else { logger.error(`failed to fetch ${url}: ${res.status}`); throw new Error(); } }).catch(err => { logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`); throw new Error(); }); }; class Webshot extends CallableInstance { constructor(wsUrl, mode, onready) { super('webshot'); this.connect = (onready) => axios_1.default.get(this.wsUrl) .then(res => { logger.info(`received websocket endpoint: ${JSON.stringify(res.data)}`); const browserType = Object.keys(res.data)[0]; return puppeteer[browserType] .connect({ wsEndpoint: res.data[browserType] }); }) .then(browser => this.browser = browser) .then(() => { logger.info('launched puppeteer browser'); if (onready) return onready(); }) .catch(error => this.reconnect(error, onready)); this.reconnect = (error, onready) => { logger.error(`connection error, reason: ${error}`); logger.warn('trying to reconnect in 2.5s...'); return (0, util_1.promisify)(setTimeout)(2500) .then(() => this.connect(onready)); }; this.renderWebshot = (url, height, webshotDelay, ...morePostProcessings) => { temp.track(); const jpeg = (data) => data.pipe(sharp()).jpeg({ quality: 90, trellisQuantisation: true }); const sharpToFile = (pic) => new Promise(resolve => { const webshotTempFilePath = temp.path({ suffix: '.jpg' }); pic.toFile(webshotTempFilePath).then(() => resolve(`file://${webshotTempFilePath}`)); }); const promise = new Promise((resolve, reject) => { const width = 720; const zoomFactor = 2; logger.info(`shooting ${width}*${height} webshot for ${url}`); this.browser.newPage({ bypassCSP: true, deviceScaleFactor: zoomFactor, locale: 'ja-JP', timezoneId: 'Asia/Tokyo', userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', }) .then(page => { const startTime = new Date().getTime(); const getTimerTime = () => new Date().getTime() - startTime; const getTimeout = () => Math.max(500, webshotDelay - getTimerTime()); const gotoUrlAndWaitForTweet = () => page.goto(url, { waitUntil: 'load', timeout: Math.min(10000, getTimeout()) }) .then(() => Promise.race([ page.waitForSelector('article', { state: 'attached', timeout: Math.min(10000, getTimeout()) }), page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', { timeout: getTimeout() }), page.waitForSelector('div[role="button"]>div>span>:text-matches("^やりなおす|更新$")', { state: 'attached', timeout: getTimeout() }).then(() => page.reload({ timeout: getTimeout() })), ])) .catch(err => { if (err.name === 'TimeoutError' && webshotDelay > getTimerTime()) { logger.warn(`navigation timed out after ${getTimerTime()} ms, retrying...`); return gotoUrlAndWaitForTweet(); } throw err; }); page.setViewportSize({ width: width / zoomFactor, height: height / zoomFactor, }) .then(() => page.route('*://video.twimg.com/**', route => route.abort().then(() => page.evaluate(videoUrl => { let videoUrls = window['__scrapedVideoUrls']; if (!videoUrls) videoUrls = window['__scrapedVideoUrls'] = []; if (!videoUrls.includes(videoUrl)) { videoUrls.push(videoUrl); return videoUrl; } }, route.request().url())).then(videoUrl => { if (videoUrl) logger.info(`scraped ${route.request().url()} from page`); }).catch(err => { logger.error(`error aborting request to ${route.request().url()}, error: ${err}`); }))) .then(gotoUrlAndWaitForTweet) .then(() => page.addStyleTag({ content: 'header,#layers{display:none!important}article{background-color:transparent!important}' + '[data-testid="caret"],[role="group"],[dir] div>a:nth-last-child(3)~span,' + '[data-testid="tweet"] [class*=" "]+:last-child>*+[class*=" "]+[role="group"]~div{display:none!important}', })) .then(() => page.addStyleTag({ content: '*{font-family:-apple-system,".Helvetica Neue DeskInterface",' + 'Hiragino Sans,Hiragino Sans GB,sans-serif!important}' + '*{-webkit-font-smoothing:antialiased!important;-webkit-locale:"ja"}', })) .then(() => page.evaluate(() => { const poll = setInterval(() => { document.querySelectorAll('div[data-testid="placementTracking"]').forEach(container => { if (container.querySelector('div[role="button"] svg')) { container.innerHTML = container.innerHTML; clearInterval(poll); } }); }, 250); })) .then(() => page.waitForSelector('xpath=//section/*/*/div[.//article//a[@aria-describedby]/time]', { state: 'attached', timeout: getTimeout() })) .then(handle => handle.evaluate(div => div.classList.add('mainTweet')) .then(() => page.addStyleTag({ content: 'div.mainTweet~div{display:none;}' })) .then(() => handle)) .then(handle => handle.$$('xpath=(.|preceding-sibling::*)//a[contains(@href,"content_you_see")]/../../..//*[@role="button"]') .then(sensitiveToggles => { const count = sensitiveToggles.length; if (count) logger.info(`found ${count} sensitive ${count === 1 ? 'tweet' : 'tweets'} on page, uncollapsing...`); return (0, utils_1.chainPromises)(sensitiveToggles.map(toggle => () => toggle.click())); }) .then(() => handle)) .then(handle => handle.$('[data-testid="tweet"]').then(owner => owner ? handle : null)) .catch((err) => { if (err.name !== 'TimeoutError') throw err; logger.warn(`${err} (${getTimerTime()} ms)`); return page.evaluate(() => document.documentElement.outerHTML).then(html => { const path = temp.path({ suffix: '.html' }); fs.writeFileSync(path, html); logger.warn(`saved debug html to ${path}`); }).then(() => page.route('**/*', route => route.abort().catch(() => { }))).then(() => page.screenshot({ fullPage: true })).then(screenshot => { sharpToFile(sharp(screenshot).jpeg({ quality: 90 })).then(fileUri => { logger.warn(`saved debug screenshot to ${fileUri.slice(7)}`); }); }).then(() => null); }) .then(handle => { if (handle === null) throw new puppeteer.errors.TimeoutError(); let cropTop; return (0, utils_1.chainPromises)(morePostProcessings.map(func => () => func(page, handle))) .then(() => (0, util_1.promisify)(setTimeout)(getTimeout())) .then(() => page.evaluate(() => document.documentElement.scrollTop)) .then(scrollTop => { cropTop = scrollTop * zoomFactor; }) .then(() => page.evaluate(() => document.activeElement.blur())) .then(() => handle.evaluateHandle(div => { const minHeight = Number(div.style.transform.match(/translateY\((.+)px\)/)[1]) + div.offsetHeight; const parentDiv = div.parentElement; parentDiv.setAttribute('style', `min-height: ${minHeight}px; margin: 0 -1px; padding: 0 1px`); return parentDiv; })) .catch(err => { logger.error(`error while parsing content height, failing this webshot`); throw err; }) .then(parentDivHandle => parentDivHandle.screenshot()) .then(screenshot => [screenshot, cropTop]); }) .then(([screenshot, cropTop]) => { new pngjs_1.PNG({ filterType: 4, deflateLevel: 0, }).on('parsed', function () { let png = this; if (cropTop > 0) { logger.info(`cropping screenshot at y offset ${cropTop}...`); png = new pngjs_1.PNG({ width: this.width, height: this.height - cropTop }); this.bitblt(png, 0, cropTop, png.width, png.height, 0, 0); } sharpToFile(jpeg(png.pack())).then(path => { logger.info(`finished webshot for ${url}`); resolve({ path, boundary: png.height }); }); }).parse(screenshot); }) .catch(err => { if (err instanceof Error && err.name !== 'TimeoutError') throw err; logger.error(`error shooting webshot for ${url}, could not load web page of tweet`); resolve({ path: '', boundary: 0 }); }) .finally(() => { page.close(); }); }) .catch(reject); }); return promise.then(data => { if (data.boundary === null) { return this.renderWebshot(url, height + 1920, webshotDelay, ...morePostProcessings); } else return data.path; }).catch(error => this.reconnect(error) .then(() => this.renderWebshot(url, height, webshotDelay, ...morePostProcessings))); }; this.fetchMedia = (url) => (url.match(/^file:/) ? Promise.resolve(url) : axiosGet(url, 'arraybuffer').then(data => { var _a; return (ext => { const mediaTempFilePath = temp.path({ suffix: `.${ext}` }); fs.writeFileSync(mediaTempFilePath, Buffer.from(data)); return `file://${mediaTempFilePath}`; })(((_a = (/\?format=([a-z]+)&/.exec(url))) !== null && _a !== void 0 ? _a : (/.*\/.*\.([^?]+)/.exec(url)))[1]); })).then(path => { switch ((/.*\.(.*?)$/.exec(path) || [])[1]) { case 'jpg': case 'png': return koishi_1.Message.Image(path); case 'mp4': case 'ts': return koishi_1.Message.Video(path); } logger.warn('unable to find MIME type of fetched media, failing this fetch'); throw Error(); }); if (this.mode = mode) { onready(); } else { this.wsUrl = wsUrl; this.connect(onready); } } webshot(tweets, callback, webshotDelay) { const promises = tweets.map(({ data, includes: { media, users: [user] } }, index) => { let promise = (0, util_1.promisify)(setTimeout)(webshotDelay / 4 * index).then(() => { logger.info(`working on ${user.username}/${data.id}`); }); let messageChain = ''; let truncatedAt; let author = `${user.name} (@${user.username}):\n`; author += `${new Date(data.created_at)}\n`; let text = data.text; const rtTweet = (data.referenced_tweets || []).find(refTweet => refTweet.type === 'retweeted'); if (rtTweet) { const match = /^(RT @.+?: )(.*)/.exec(text); author += match[1]; text = match[2]; } const urls = data.entities && data.entities.urls || []; promise = promise.then(() => { if (urls.length) { urls.forEach(url => { text = text.replace(new RegExp(url.url, 'gm'), url.expanded_url); }); } if (media) { media.forEach(entity => { const mediaUrl = urls.find((url) => url.media_key); if (!mediaUrl) return; text = text.replace(new RegExp(mediaUrl.expanded_url, 'gm'), this.mode === 1 ? `[${typeInZH[entity.type]}]` : ''); }); } if (this.mode > 0) messageChain += (author + xmlEntities.decode(text)); }); if (this.mode === 0) { const url = `https://mobile.twitter.com/${user.username}/status/${data.id}`; promise = promise.then(() => this.renderWebshot(url, 1920, webshotDelay, (_, tweetHandle) => tweetHandle.evaluate(div => { try { const selector = '[data-testid="tweet"] :nth-child(2)>:first-child a'; const getProfileUrl = () => (div.querySelector(selector) || { href: '' }).href; const ownerProfileUrl = getProfileUrl(); const bottom = div; while (div = div.previousElementSibling) { if (getProfileUrl() !== ownerProfileUrl || div === bottom.previousElementSibling) continue; const top = document.documentElement.scrollTop = window.scrollY + div.getBoundingClientRect().top; if (top > 10) return div.querySelector('article a[aria-label]').href.replace(/.*\/status\//, ''); } } catch (_a) { } document.documentElement.scrollTop = 0; }).then((id) => { if (!id) return; logger.info(`thread too long, truncating at tweet ${id}...`); truncatedAt = id; }), (page, tweetHandle) => tweetHandle.evaluate(div => { const cardMediaDiv = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"]'); const cardMedia = cardMediaDiv === null || cardMediaDiv === void 0 ? void 0 : cardMediaDiv.querySelector('img, video'); if (!cardMedia) return {}; let match; if (cardMedia.tagName === 'IMG' && typeof cardMedia.getAttribute('src') === 'string') { match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardMedia.getAttribute('src')); } if (cardMedia.tagName === 'VIDEO' && typeof cardMedia.getAttribute('poster') === 'string') { match = /^(.*\/amplify_video_thumb\/(\d+)\/img\/.*$)/.exec(cardMedia.getAttribute('poster')); if (!match) match = [, cardMedia.getAttribute('poster'), '.*']; } if (!match) return {}; const [url, id] = match.slice(1); return { type: cardMedia.tagName, entityBase: { url, media_key: `${{ IMG: 3, VIDEO: 7 }[cardMedia.tagName]}_${id}`, } }; }).then(({ type, entityBase }) => { if (!type) return; media || (media = []); if (media.some(entity => entity.media_key === entityBase.media_key)) return; if (type === 'IMG') media.push(Object.assign(Object.assign({}, entityBase), { type: 'photo' })); if (type === 'VIDEO') page.evaluate(id_str => { var _a; return (_a = window['__scrapedVideoUrls']) === null || _a === void 0 ? void 0 : _a.find(videoUrl => new RegExp(`.*/amplify_video/${id_str}/pl/[^/]*\\.m3u8(?:\\?|$)`).exec(videoUrl)); }, entityBase.media_key.slice(2)).then(streamlistUrl => axiosGet(streamlistUrl, 'text') .then(utils_1.M3u8.parseStreamlist) .then(playlists => playlists.sort((pl1, pl2) => pl2.bandwidth - pl1.bandwidth)[0]) .then(({ bandwidth, playlistPath }) => { const playlistUrl = new URL(playlistPath, streamlistUrl); return axiosGet(playlistUrl.href, 'text') .then(playlist => utils_1.M3u8.parsePlaylist(playlist)) .then(({ duration, segmentPaths, extension: ext }) => { const mediaTempFilePath = temp.path({ suffix: `.${ext}` }); return (0, utils_1.chainPromises)(segmentPaths.map(path => () => axiosGet(new URL(path, playlistUrl).href, 'arraybuffer').then(data => { fs.writeFileSync(mediaTempFilePath, Buffer.from(data), { flag: 'a' }); }))) .then(() => ({ variants: [{ bit_rate: bandwidth, content_type: { mp4: 'video/mp4', ts: 'video/mp2t' }[ext], url: `file://${mediaTempFilePath}`, }] })); }); })).then(videoInfo => media.push(Object.assign(Object.assign(Object.assign({}, entityBase), { type: 'video' }), videoInfo))).catch(error => { logger.error(`error while fetching scraped video, error: ${error}`); logger.warn('unable to fetch scraped video, ignoring...'); }); }))) .then(fileurl => { if (fileurl) return koishi_1.Message.Image(fileurl); return '[截图不可用] ' + author + text; }) .then(msg => { if (msg) messageChain += msg; }); } if (1 - this.mode % 2) promise = promise.then(() => { if (media) { return (0, utils_1.chainPromises)(media.map(entity => () => { let url; if (entity.type === 'photo') { url = entity.url.replace(/\.([a-z]+)$/, '?format=$1') + '&name=orig'; } else { url = entity.variants .filter(variant => variant.bit_rate !== undefined) .sort((var1, var2) => var2.bit_rate - var1.bit_rate) .map(variant => variant.url)[0]; } const altMessage = `\n[失败的${typeInZH[entity.type]}:${url}]`; return this.fetchMedia(url) .catch(error => { logger.warn('unable to fetch media, sending plain text instead...'); return altMessage; }) .then(msg => { messageChain += msg; }); })); } }); promise = promise.then(() => { if (truncatedAt) { messageChain += `\n回复此命令查看对话串中更早的推文:\n/twi_view ${truncatedAt}`; } }); const quotedTweet = (data.referenced_tweets || []).find(refTweet => refTweet.type === 'quoted'); if (quotedTweet) { promise = promise.then(() => { messageChain += `\n回复此命令查看引用的推文:\n/twi_view ${quotedTweet.id}`; }); } return promise.then(() => { logger.info(`done working on ${user.username}/${data.id}, message chain:`); logger.info(JSON.stringify(messageChain)); let cacheId = data.id; if (rtTweet) cacheId += `,rt:${rtTweet.id}`; callback(cacheId, messageChain, xmlEntities.decode(text), author); }); }); return Promise.all(promises).then(); } } exports.default = Webshot;