"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = require("fs"); const util_1 = require("util"); const axios_1 = require("axios"); const CallableInstance = require("callable-instance"); const html_entities_1 = require("html-entities"); const pngjs_1 = require("pngjs"); const puppeteer = require("playwright"); const sharp = require("sharp"); const temp = require("temp"); const loggers_1 = require("./loggers"); const koishi_1 = require("./koishi"); const utils_1 = require("./utils"); const xmlEntities = new html_entities_1.XmlEntities(); const ZHType = (type) => new class extends String { constructor() { super(...arguments); this.type = super.toString(); this.toString = () => `[${super.toString()}]`; } }(type); const typeInZH = { photo: ZHType('图片'), video: ZHType('视频'), animated_gif: ZHType('GIF'), }; const logger = loggers_1.getLogger('webshot'); class Webshot extends CallableInstance { constructor(wsUrl, mode, onready) { super('webshot'); this.connect = (onready) => axios_1.default.get(this.wsUrl) .then(res => { logger.info(`received websocket endpoint: ${JSON.stringify(res.data)}`); const browserType = Object.keys(res.data)[0]; return puppeteer[browserType] .connect({ wsEndpoint: res.data[browserType] }); }) .then(browser => this.browser = browser) .then(() => { logger.info('launched puppeteer browser'); if (onready) return onready(); }) .catch(error => this.reconnect(error, onready)); this.reconnect = (error, onready) => { logger.error(`connection error, reason: ${error}`); logger.warn('trying to reconnect in 2.5s...'); return util_1.promisify(setTimeout)(2500) .then(() => this.connect(onready)); }; this.extendEntity = (media) => { logger.info('not working on a tweet'); }; this.truncateLongThread = (atId) => { logger.info('not working on a tweet'); }; this.renderWebshot = (url, height, webshotDelay, ...morePostProcessings) => { temp.track(); const jpeg = (data) => data.pipe(sharp()).jpeg({ quality: 90, trellisQuantisation: true }); const sharpToFile = (pic) => new Promise(resolve => { const webshotTempFilePath = temp.path({ suffix: '.jpg' }); pic.toFile(webshotTempFilePath).then(() => resolve(`file://${webshotTempFilePath}`)); }); const promise = new Promise((resolve, reject) => { const width = 720; const zoomFactor = 2; logger.info(`shooting ${width}*${height} webshot for ${url}`); this.browser.newPage({ bypassCSP: true, deviceScaleFactor: zoomFactor, locale: 'ja-JP', timezoneId: 'Asia/Tokyo', userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', }) .then(page => { const startTime = new Date().getTime(); const getTimerTime = () => new Date().getTime() - startTime; const getTimeout = () => Math.max(500, webshotDelay - getTimerTime()); page.setViewportSize({ width: width / zoomFactor, height: height / zoomFactor, }) .then(() => page.route('*:\/\/video.twimg.com\/**', route => { route.abort(); })) .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() })) .then(() => Promise.race([ page.waitForSelector('article', { state: 'attached', timeout: getTimeout() }), page.click('#placeholder+#ScriptLoadFailure input[value="Try again"]', { timeout: getTimeout() }), page.waitForSelector('div[role="button"]>div>span>:text-matches("^やりなおす|更新$")', { state: 'attached', timeout: getTimeout() }) .then(() => page.reload({ timeout: getTimeout() })), ])) .then(() => page.addStyleTag({ content: 'header,#layers{display:none!important}article{background-color:transparent!important}' + '[data-testid="caret"],[role="group"],[data-testid="tweet"] [class*=" "]+:last-child>*+[class*=" "]~div{display:none}', })) .then(() => page.addStyleTag({ content: '*{font-family:-apple-system,".Helvetica Neue DeskInterface",Hiragino Sans,Hiragino Sans GB,sans-serif!important}', })) .then(() => page.evaluate(() => { const poll = setInterval(() => { document.querySelectorAll('div[data-testid="placementTracking"]').forEach(container => { if (container.querySelector('div[role="button"] svg')) { container.innerHTML = container.innerHTML; clearInterval(poll); } }); }, 250); })) .then(() => page.waitForSelector('xpath=//section/*/*/div[.//article[not(.//time[not(ancestor::div[@aria-labelledby])])]]', { state: 'attached', timeout: getTimeout() })) .then(handle => handle.$$('xpath=..//a[contains(@href,"content_you_see")]/../../..//*[@role="button"]') .then(sensitiveToggles => { const count = sensitiveToggles.length; if (count) logger.info(`found ${count} sensitive ${count === 1 ? 'tweet' : 'tweets'} on page, uncollapsing...`); return utils_1.chainPromises(sensitiveToggles.filter(toggle => toggle.isVisible()).map(toggle => () => toggle.click())); }) .then(() => handle)) .then(handle => handle.$('[data-testid="tweet"]').then(owner => owner ? handle : null)) .catch((err) => { if (err.name !== 'TimeoutError') throw err; logger.warn(`${err} (${getTimerTime()} ms)`); return page.evaluate(() => document.documentElement.outerHTML).then(html => { const path = temp.path({ suffix: '.html' }); fs_1.writeFileSync(path, html); logger.warn(`saved debug html to ${path}`); }).then(() => page.screenshot()).then(screenshot => { sharpToFile(sharp(screenshot).jpeg({ quality: 90 })).then(fileUri => { logger.warn(`saved debug screenshot to ${fileUri.substring(7)}`); }); }).then(() => null); }) .then((handle) => { if (handle === null) throw new puppeteer.errors.TimeoutError(); return handle.evaluate(div => { try { const selector = '[data-testid="tweet"] :nth-child(2)>:first-child a'; const getProfileUrl = () => (div.querySelector(selector) || { href: '' }).href; const ownerProfileUrl = getProfileUrl(); const bottom = div; while (div = div.previousElementSibling) { if (getProfileUrl() !== ownerProfileUrl || div === bottom.previousElementSibling) continue; const top = document.documentElement.scrollTop = window.scrollY + div.getBoundingClientRect().top; if (top > 10) return div.querySelector('article a[aria-label]').href.replace(/.*\/status\//, ''); } } catch (_a) { } document.documentElement.scrollTop = 0; }).then(this.truncateLongThread).then(() => handle); }) .then(handle => handle.evaluate(div => { const cardImg = div.querySelector('div[data-testid^="card.layout"][data-testid$=".media"] img'); if (typeof (cardImg === null || cardImg === void 0 ? void 0 : cardImg.getAttribute('src')) === 'string') { const match = /^(.*\/card_img\/(\d+)\/.+\?format=.*)&name=/.exec(cardImg === null || cardImg === void 0 ? void 0 : cardImg.getAttribute('src')); if (match) { const [media_url_https, id_str] = match.slice(1); return { media_url: media_url_https.replace(/^https/, 'http'), media_url_https, url: '', display_url: '', expanded_url: '', type: 'photo', id: Number(id_str), id_str, sizes: undefined, }; } } })) .then(cardImg => { if (cardImg) this.extendEntity(cardImg); }) .then(() => utils_1.chainPromises(morePostProcessings.map(func => () => func(page)))) .then(() => util_1.promisify(setTimeout)(getTimeout())) .then(() => page.evaluate(() => document.activeElement.blur())) .then(() => page.screenshot()) .then(screenshot => { new pngjs_1.PNG({ filterType: 4, deflateLevel: 0, }).on('parsed', function () { const idx = (x, y) => (this.width * y + x) << 2; let boundary = null; const x = zoomFactor * 2; for (let y = x; y < this.height; y += zoomFactor) { if (this.data[idx(x, y)] !== this.data[idx(x, y - zoomFactor)] && this.data[idx(x, y)] === this.data[idx(x + zoomFactor * 10, y)]) { boundary = y; break; } } if (boundary !== null) { logger.info(`found boundary at ${boundary}, cropping image`); this.data = this.data.slice(0, idx(this.width, boundary)); this.height = boundary; sharpToFile(jpeg(this.pack())).then(path => { logger.info(`finished webshot for ${url}`); resolve({ path, boundary }); }); } else if (height >= 8 * 1920) { logger.warn('too large, consider as a bug, returning'); sharpToFile(jpeg(this.pack())).then(path => { resolve({ path, boundary: 0 }); }); } else { logger.info('unable to find boundary, try shooting a larger image'); resolve({ path: '', boundary }); } }).parse(screenshot); }) .catch(err => { if (err instanceof Error && err.name !== 'TimeoutError') throw err; logger.error(`error shooting webshot for ${url}, could not load web page of tweet`); resolve({ path: '', boundary: 0 }); }) .finally(() => { page.close(); }); }) .catch(reject); }); return promise.then(data => { if (data.boundary === null) { return this.renderWebshot(url, height + 1920, webshotDelay, ...morePostProcessings); } else return data.path; }).catch(error => this.reconnect(error) .then(() => this.renderWebshot(url, height, webshotDelay, ...morePostProcessings))); }; this.fetchMedia = (url) => new Promise((resolve, reject) => { logger.info(`fetching ${url}`); axios_1.default({ method: 'get', url, responseType: 'arraybuffer', timeout: 150000, }).then(res => { if (res.status === 200) { logger.info(`successfully fetched ${url}`); resolve(res.data); } else { logger.error(`failed to fetch ${url}: ${res.status}`); reject(); } }).catch(err => { logger.error(`failed to fetch ${url}: ${err instanceof Error ? err.message : err}`); reject(); }); }).then(data => { var _a; return (ext => { const mediaTempFilePath = temp.path({ suffix: `.${ext}` }); fs_1.writeFileSync(mediaTempFilePath, Buffer.from(data)); const path = `file://${mediaTempFilePath}`; switch (ext) { case 'jpg': case 'png': return koishi_1.Message.Image(path); case 'mp4': return koishi_1.Message.Video(path); } logger.warn('unable to find MIME type of fetched media, failing this fetch'); throw Error(); })(((_a = (/\?format=([a-z]+)&/.exec(url))) !== null && _a !== void 0 ? _a : (/.*\/.*\.([^?]+)/.exec(url)))[1]); }); if (this.mode = mode) { onready(); } else { this.wsUrl = wsUrl; this.connect(onready); } } webshot(tweets, callback, webshotDelay) { let promise = new Promise(resolve => { resolve(); }); tweets.forEach(twi => { promise = promise.then(() => { logger.info(`working on ${twi.user.screen_name}/${twi.id_str}`); }); const originTwi = twi.retweeted_status || twi; let messageChain = ''; let truncatedAt; let author = `${twi.user.name} (@${twi.user.screen_name}):\n`; if (twi.retweeted_status) author += `RT @${twi.retweeted_status.user.screen_name}: `; let text = originTwi.full_text; promise = promise.then(() => { if (originTwi.entities && originTwi.entities.urls && originTwi.entities.urls.length) { originTwi.entities.urls.forEach(url => { text = text.replace(new RegExp(url.url, 'gm'), url.expanded_url); }); } if (originTwi.extended_entities) { originTwi.extended_entities.media.forEach(media => { text = text.replace(new RegExp(media.url, 'gm'), this.mode === 1 ? typeInZH[media.type] : ''); }); } if (this.mode > 0) messageChain += (author + xmlEntities.decode(text)); }); if (this.mode === 0) { const url = `https://mobile.twitter.com/${twi.user.screen_name}/status/${twi.id_str}`; this.extendEntity = (cardImg) => { var _a, _b; originTwi.extended_entities = Object.assign(Object.assign({}, originTwi.extended_entities), { media: [ ...(_b = (_a = originTwi.extended_entities) === null || _a === void 0 ? void 0 : _a.media) !== null && _b !== void 0 ? _b : [], cardImg, ] }); }; this.truncateLongThread = (atId) => { if (!atId) return; logger.info(`thread too long, truncating at tweet ${atId}...`); truncatedAt = atId; }; promise = promise.then(() => this.renderWebshot(url, 1920, webshotDelay)) .then(fileurl => { if (fileurl) return koishi_1.Message.Image(fileurl); return '[截图不可用] ' + author + text; }) .then(msg => { if (msg) messageChain += msg; }); } if (1 - this.mode % 2) promise = promise.then(() => { if (originTwi.extended_entities) { return utils_1.chainPromises(originTwi.extended_entities.media.map(media => () => { let url; if (media.type === 'photo') { url = media.media_url_https.replace(/\.([a-z]+)$/, '?format=$1') + '&name=orig'; } else { url = media.video_info.variants .filter(variant => variant.bitrate !== undefined) .sort((var1, var2) => var2.bitrate - var1.bitrate) .map(variant => variant.url)[0]; } const altMessage = `\n[失败的${typeInZH[media.type].type}:${url}]`; return this.fetchMedia(url) .catch(error => { logger.warn('unable to fetch media, sending plain text instead...'); return altMessage; }) .then(msg => { messageChain += msg; }); })); } }); if (this.mode === 0) { if (originTwi.entities && originTwi.entities.urls && originTwi.entities.urls.length) { promise = promise.then(() => { const urls = originTwi.entities.urls .filter(urlObj => urlObj.indices[0] < originTwi.display_text_range[1]) .map(urlObj => `\n\ud83d\udd17 ${urlObj.expanded_url}`); if (urls.length) { messageChain += urls.join(''); } }); } } promise = promise.then(() => { if (truncatedAt) { messageChain += `\n回复此命令查看对话串中更早的推文:\n/twitter_view ${truncatedAt}`; } }); if (originTwi.is_quote_status) { promise = promise.then(() => { var _a, _b; const match = /\/status\/(\d+)/.exec((_a = originTwi.quoted_status_permalink) === null || _a === void 0 ? void 0 : _a.expanded); const blockQuoteIdStr = match ? match[1] : (_b = originTwi.quoted_status) === null || _b === void 0 ? void 0 : _b.id_str; if (blockQuoteIdStr) messageChain += `\n回复此命令查看引用的推文:\n/twitter_view ${blockQuoteIdStr}`; }); } promise.then(() => { logger.info(`done working on ${twi.user.screen_name}/${twi.id_str}, message chain:`); logger.info(JSON.stringify(koishi_1.Message.ellipseBase64(messageChain))); const twiId = twi.retweeted_status ? twi.retweeted_status.id_str : twi.id_str; callback(twiId, messageChain, xmlEntities.decode(text), author); }); }); return promise; } } exports.default = Webshot;