import * as crypto from 'crypto'; import * as fs from 'fs'; import * as http from 'http'; import * as path from 'path'; import { parse as parseUrl } from 'url'; import { promisify } from 'util'; import { instagramIdToUrlSegment as idToUrlSegment, urlSegmentToInstagramId as pubUrlSegmentToId } from 'instagram-id-to-url-segment'; import { IgApiClient, IgClientError, IgCookieNotFoundError, IgExactUserNotFoundError, IgLoginRequiredError, IgLoginTwoFactorRequiredError, IgResponseError, MediaInfoResponseItemsItem, UserFeedResponseItemsItem } from 'instagram-private-api'; import { SocksProxyAgent } from 'socks-proxy-agent'; import { getLogger } from './loggers'; import QQBot from './koishi'; import { Arr, BigNumOps, chainPromises, customError, neverResolves } from './utils'; import Webshot, { Cookies, Page } from './webshot'; const parseLink = (link: string): { userName?: string, postUrlSegment?: string } => { let match = /instagram\.com\/p\/([A-Za-z0-9\-_]+)/.exec(link); if (match) return { postUrlSegment: match[1] }; match = /instagram\.com\/([^\/?#]+)/.exec(link) || /^([^\/?#]+)$/.exec(link); if (match) return { userName: ScreenNameNormalizer.normalize(match[1]).split(':')[0] }; return; }; const linkBuilder = (config: ReturnType): string => { if (config.userName) return `https://www.instagram.com/${config.userName}/`; if (config.postUrlSegment) return `https://www.instagram.com/p/${config.postUrlSegment}/`; }; const graphqlLinkBuilder = ({userId, first = '12', after}: {userId: string, first?: string, after?: string}) => `https://www.instagram.com/graphql/query/\ ?query_id=17888483320059182&id=${userId}&first=${first}${after ? `&after=${after}` : ''}`; const urlSegmentToId = (urlSegment: string) => urlSegment.length <= 28 ? pubUrlSegmentToId(urlSegment) : pubUrlSegmentToId(urlSegment.slice(0, -28)); export { graphqlLinkBuilder, linkBuilder, parseLink, idToUrlSegment, urlSegmentToId }; interface IWorkerOption { sessionLockfile: string; credentials: [string, string]; codeServicePort: number; proxyUrl: string; lock: ILock; lockfile: string; webshotCookiesLockfile: string; bot: QQBot; inactiveHours: string[]; workInterval: number; webshotDelay: number; mode: number; wsUrl: string; } export class SessionManager { private ig: IgApiClient; private username: string; private password: string; private lockfile: string; private codeServicePort: number; constructor(client: IgApiClient, file: string, credentials: [string, string], codeServicePort: number) { this.ig = client; this.lockfile = file; [this.username, this.password] = credentials; this.codeServicePort = codeServicePort; } public init = () => { this.ig.state.generateDevice(this.username); this.ig.request.end$.subscribe(() => { this.save(); }); const filePath = path.resolve(this.lockfile); if (fs.existsSync(filePath)) { try { const serialized = JSON.parse(fs.readFileSync(filePath, 'utf8')) as { [key: string]: any }; return this.ig.state.deserialize(serialized).then(() => { logger.info(`successfully loaded client session cookies for user ${this.username}`); }); } catch (err) { logger.error(`failed to load client session cookies from file ${this.lockfile}: `, err); return Promise.resolve(); } } else { return this.login().catch((err: IgClientError) => { logger.error(`error while trying to log in as user ${this.username}, error: ${err}`); logger.warn('attempting to retry after 1 minute...'); if (fs.existsSync(filePath)) fs.unlinkSync(filePath); promisify(setTimeout)(60000).then(this.init); }); } }; public handle2FA = (submitter: (code: string) => Promise) => new Promise((resolve, reject) => { const token = crypto.randomBytes(20).toString('hex'); logger.info('please submit the code with a one-time token from your browser with this path:'); logger.info(`/confirm-2fa?code=&token=${token}`); let working; const server = http.createServer((req, res) => { const {pathname, query} = parseUrl(req.url, true); if (!working && pathname === '/confirm-2fa' && query.token === token && typeof(query.code) === 'string' && /^\d{6}$/.test(query.code)) { const code = query.code; logger.debug(`received code: ${code}`); working = true; submitter(code) .then(response => { res.write('OK'); res.end(); server.close(() => resolve(response)); }) .catch(err => { res.write('Error'); res.end(); reject(err); }) .finally(() => { working = false; }); } }); server.listen(this.codeServicePort); }); public login = () => this.ig.simulate.preLoginFlow() .then(() => this.ig.account.login(this.username, this.password)) .catch((err: IgClientError) => { if (err instanceof IgLoginTwoFactorRequiredError) { const {two_factor_identifier, totp_two_factor_on} = err.response.body.two_factor_info; logger.debug(`2FA info: ${JSON.stringify(err.response.body.two_factor_info)}`); logger.info(`login is requesting two-factor authentication via ${totp_two_factor_on ? 'TOTP' : 'SMS'}`); return this.handle2FA(code => this.ig.account.twoFactorLogin({ username: this.username, verificationCode: code, twoFactorIdentifier: two_factor_identifier, verificationMethod: totp_two_factor_on ? '0' : '1', })); } throw err; }) .then(user => new Promise(resolve => { logger.info(`successfully logged in as ${this.username}`); process.nextTick(() => resolve(this.ig.simulate.postLoginFlow().then(() => user))); })); public save = () => this.ig.state.serialize() .then((serialized: { [key: string]: any }) => { delete serialized.constants; return fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(serialized, null, 2), 'utf-8'); }); } export class ScreenNameNormalizer { // tslint:disable-next-line: variable-name public static _queryUser: (username: string) => Promise; public static normalize = (username: string) => `${username.toLowerCase().replace(/^@/, '')}:`; public static async normalizeLive(username: string) { if (this._queryUser) { return await this._queryUser(username) .catch((err: IgClientError) => { if (!(err instanceof IgExactUserNotFoundError)) { logger.warn(`error looking up user: ${err.message}`); return `${username}:`; } return null; }); } return this.normalize(username); } } let browserLogin = (page: Page): Promise => Promise.resolve(); let browserSaveCookies = browserLogin; let isWaitingForLogin = false; const acceptCookieConsent = (page: Page) => page.click('button:text-matches("すべて.*許可")', { timeout: 5000 }) .then(() => logger.info('accepted cookie consent')) .catch((err: Error) => { if (err.name !== 'TimeoutError') throw err; }); export const WebshotHelpers = { handleLogin: browserLogin, handleCookieConsent: acceptCookieConsent, get isWaitingForLogin() { return isWaitingForLogin; }, }; export let getPostOwner = (segmentId: string): Promise => Promise.reject(); export let sendPost = (segmentId: string, receiver: IChat): void => { throw Error(); }; type IgGraphQLTimelineMediaNode = { id: string, display_url: string, owner: { id: string, username?: string, }, } & ( {__typename: 'GraphImage'} | {__typename: 'GraphSidecar', edge_sidecar_to_children: { edges: {node: (IgGraphQLTimelineMediaNode & {__typename: 'GraphImage'})}[], }} | {__typename: 'GraphVideo', video_url: string, product_type?: 'igtv'} ); export type IgGraphQLUser = { biography?: string, fbid: string, full_name: string, id: string, username: string, edge_owner_to_timeline_media: { count: number, page_info: { has_next_page: boolean, end_cursor: string | null, }, edges: {node: IgGraphQLTimelineMediaNode}[], }, }; export type MediaItem = MediaInfoResponseItemsItem & UserFeedResponseItemsItem; export type LazyMediaItem = { pk: string, item: () => Promise, }; const logger = getLogger('instagram'); const maxTrials = 3; const retryInterval = 1500; const ordinal = (n: number) => { switch ((Math.trunc(n / 10) % 10 === 1) ? 0 : n % 10) { case 1: return `${n}st`; case 2: return `${n}nd`; case 3: return `${n}rd`; default: return `${n}th`; } }; const retryOnError = ( doWork: () => Promise, onRetry: (error, count: number, terminate: (defaultValue: U) => void) => void ) => new Promise(resolve => { const retry = (reason, count: number) => { setTimeout(() => { let terminate = false; onRetry(reason, count, defaultValue => { terminate = true; resolve(defaultValue); }); if (!terminate) doWork().then(resolve).catch(error => retry(error, count + 1)); }, retryInterval); }; doWork().then(resolve).catch(error => retry(error, 1)); }); export default class { private client: IgApiClient; private lock: ILock; private lockfile: string; private inactiveHours: string[]; private workInterval: number; private bot: QQBot; private webshotDelay: number; private webshotCookies: Cookies = []; private webshotCookiesLockfile: string; private webshot: Webshot; private mode: number; private wsUrl: string; public session: SessionManager; constructor(opt: IWorkerOption) { this.client = new IgApiClient(); if (opt.proxyUrl) { try { const url = new URL(opt.proxyUrl); if (!/^socks(?:4a?|5h?)?:$/.test(url.protocol)) throw Error(); if (!url.port) url.port = '1080'; this.client.request.defaults.agent = new SocksProxyAgent({ hostname: url.hostname, port: url.port, userId: url.username, password: url.password, }); } catch (e) { logger.warn(`invalid socks proxy url: ${opt.proxyUrl}, ignoring`); } } this.session = new SessionManager(this.client, opt.sessionLockfile, opt.credentials, opt.codeServicePort); this.lockfile = opt.lockfile; this.webshotCookiesLockfile = opt.webshotCookiesLockfile; this.lock = opt.lock; this.inactiveHours = opt.inactiveHours; this.workInterval = opt.workInterval; this.bot = opt.bot; this.webshotDelay = opt.webshotDelay; this.mode = opt.mode; this.wsUrl = opt.wsUrl; const cookiesFilePath = path.resolve(this.webshotCookiesLockfile); try { this.webshotCookies = JSON.parse(fs.readFileSync(cookiesFilePath, 'utf8')) as Cookies; logger.info(`loaded webshot cookies from file ${this.webshotCookiesLockfile}`); } catch (err) { logger.warn( `failed to load webshot cookies from file ${this.webshotCookiesLockfile}: `, (err as Error).message ); logger.warn('cookies will be saved to this file when needed'); } browserLogin = page => page.fill('input[name="username"]', opt.credentials[0], {timeout: 0}) .then(() => { if (isWaitingForLogin !== true) return; logger.warn('still waiting for login, pausing execution...'); return neverResolves(); }) .then(() => { isWaitingForLogin = true; logger.warn('blocked by login dialog, trying to log in manually...'); }) .then(() => page.fill('input[name="password"]', opt.credentials[1], {timeout: 0})) .then(() => page.click('button[type="submit"]', {timeout: 0})) .then(() => (next => Promise.race([ page.waitForSelector('#verificationCodeDescription', {timeout: 0}).then(handle => handle.innerText()).then(text => { logger.info(`login is requesting two-factor authentication via ${/認証アプリ/.test(text) ? 'TOTP' : 'SMS'}`); return this.session.handle2FA(code => page.fill('input[name="verificationCode"]', code, {timeout: 0})) .then(() => page.click('button:has-text("実行")', {timeout: 0})) .then(next); }), next(), ]))(() => page.click('button:has-text("情報を保存")', {timeout: 0}).then(() => { isWaitingForLogin = false; })) ); browserSaveCookies = page => page.context().cookies() .then(cookies => { this.webshotCookies = cookies; logger.info('successfully logged in, saving cookies to file...'); fs.writeFileSync(path.resolve(this.webshotCookiesLockfile), JSON.stringify(cookies, null, 2), 'utf-8'); }); WebshotHelpers.handleLogin = page => browserLogin(page) .then(() => page.waitForSelector('img[data-testid="user-avatar"]', { timeout: this.webshotDelay })) .then(() => browserSaveCookies(page)) .catch((err: Error) => { if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; } throw err; }); ScreenNameNormalizer._queryUser = this.queryUser; const parseMediaError = (err: IgClientError) => { if (!(err instanceof IgResponseError && err.text === 'Media not found or unavailable')) { logger.warn(`error retrieving instagram media: ${err.message}`); return `获取媒体时出现错误:${err.message}`; } return '找不到请求的媒体,它可能已被删除。'; }; getPostOwner = (segmentId) => this.client.media.info(urlSegmentToId(segmentId)) .then(media => media.items[0].user) .then(user => `${user.username}:${user.pk}`) .catch((err: IgClientError) => { throw Error(parseMediaError(err)); }); sendPost = (segmentId, receiver) => { const lazyMedia = this.lazyGetMediaById(urlSegmentToId(segmentId)); return lazyMedia.item().then(mediaItem => { const lock = this.lock; const feed = linkBuilder({userName: mediaItem.user.username}); if (lock.feed.includes(feed) && lock.threads[feed].offset < mediaItem.pk) { logger.info(`post is newer than last offset of thread (${idToUrlSegment(lock.threads[feed].offset)}), updating...`); this.workOnFeed(feed); if (lock.threads[feed].subscribers.some(subscriber => subscriber.chatID.toString() === receiver.chatID.toString() && subscriber.chatType === receiver.chatType )) return logger.info(`receiver has already subscribed to feed ${feed}, not sending again`); } lazyMedia.item = () => Promise.resolve(mediaItem); this.workOnMedia([lazyMedia], this.sendMedia(`instagram media ${segmentId}`, receiver)); }).catch((err: IgClientError) => { this.bot.sendTo(receiver, parseMediaError(err)); if (err instanceof IgLoginRequiredError || err instanceof IgCookieNotFoundError) { logger.warn('login required, awaiting login...'); this.bot.sendTo(receiver, '等待登陆中,稍后会处理请求,请稍候……'); return this.session.login().then(() => sendPost(segmentId, receiver)); }; }); }; } public launch = () => { this.webshot = new Webshot( this.wsUrl, this.mode, () => this.webshotCookies, doOnNewPage => { this.queryUserMedia = ((userName, targetId) => { let page: Page; let url = linkBuilder({userName}) + '?__a=1'; logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`); return doOnNewPage(newPage => { page = newPage; let timeout = this.webshotDelay / 2; const startTime = new Date().getTime(); const getTimerTime = () => new Date().getTime() - startTime; const getTimeout = () => isWaitingForLogin ? 0 : Math.max(5000, timeout - getTimerTime()); return page.context().addCookies(this.webshotCookies) .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()})) .then(response => { const itemIds: string[] = []; const redirectionHandler = () => acceptCookieConsent(page) .then(() => browserLogin(page)) .catch((err: Error) => { if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; } throw err; }) .then(() => browserSaveCookies(page)) .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()})) .then(responseHandler); const responseHandler = (res: typeof response): ReturnType => { if (res.status() !== 200) { throw customError('ResponseError')( `error navigating to user page, error was: ${res.status()} ${res.statusText()}` ); } return res.json() .catch(redirectionHandler) .then((json: {[key: string]: {user: IgGraphQLUser}}) => { if (!json || !(json.graphql || json.data)?.user) { logger.warn('error parsing graphql response, returning empty object...'); const data = {user: {edge_owner_to_timeline_media: {edges: []}} as IgGraphQLUser}; return {graphql: data, data}; } return json; }); }; const jsonHandler = ({user}: {user: IgGraphQLUser}): string[] | Promise => { const pageInfo = user.edge_owner_to_timeline_media.page_info; for (const {node} of user.edge_owner_to_timeline_media.edges) { // exclude IGTV if (node.__typename === 'GraphVideo' && node.product_type === 'igtv') continue; // add post if ID is greater than target if (node.id && BigNumOps.compare(node.id, targetId) > 0) itemIds.push(node.id); // return of ID is equal to or smaller than target else return itemIds; // return after first addition if newly subscribed or restarted with resuming disabled if (Number(targetId) < 1) return itemIds; } // return if all IDs are greater than target but end of feed is reached if (!pageInfo?.has_next_page) return itemIds; // else, fetch next page using end_cursor logger.info('unable to find a smaller id than target, trying on next page...'); url = graphqlLinkBuilder({userId: user.id, after: pageInfo.end_cursor}); const nextPageDelay = this.webshotDelay * (0.4 + Math.random() * 0.1); timeout += nextPageDelay; return promisify(setTimeout)(nextPageDelay) .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()})) .then(responseHandler) .then(({data}: {data: {user: IgGraphQLUser}}) => jsonHandler(data)); }; return responseHandler(response) .then(({graphql}: {graphql: {user: IgGraphQLUser}}) => jsonHandler(graphql)); }).catch((err: Error) => { if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err; if (err.name === 'ResponseError') { logger.warn(`error while fetching posts by @${userName}: ${err.message}`); } else logger.warn(`navigation timed out at ${getTimerTime()} ms`); return [] as string[]; }).then(itemIds => promisify(setTimeout)(getTimeout()).then(() => itemIds.map(id => this.lazyGetMediaById(id)) )); }).finally(() => { page.close(); }); }); setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length); } ); }; public queryUserMedia: (username: string, targetId?: string) => Promise; public queryUser = (username: string) => this.client.user.searchExact(username) .catch((error: IgClientError) => { if (error instanceof IgLoginRequiredError || error instanceof IgCookieNotFoundError) { logger.warn('login required, logging in again...'); return this.session.login().then(() => this.client.user.searchExact(username)); } else throw error; }) .then(user => `${user.username}:${user.pk}`); private workOnMedia = ( lazyMediaItems: LazyMediaItem[], sendMedia: (msg: string, text: string, author: string) => void ) => this.webshot(lazyMediaItems, sendMedia, this.webshotDelay); public urlSegmentToId = urlSegmentToId; public lazyGetMediaById = (id: string): LazyMediaItem => ({ pk: id, item: () => this.client.media.info(id).then(media => { const mediaItem = media.items[0] as MediaItem; logger.debug(`api returned media post ${JSON.stringify(mediaItem)} for query id=${id}`); return mediaItem; }), }); private sendMedia = (source?: string, ...to: IChat[]) => (msg: string, text: string, author: string) => { to.forEach(subscriber => { logger.info(`pushing data${source ? ` of ${source}` : ''} to ${JSON.stringify(subscriber)}`); retryOnError( () => this.bot.sendTo(subscriber, msg), (_, count, terminate: (doNothing: Promise) => void) => { if (count <= maxTrials) { logger.warn(`retry sending to ${subscriber.chatID} for the ${ordinal(count)} time...`); } else { logger.warn(`${count - 1} consecutive failures while sending message chain, trying plain text instead...`); terminate(this.bot.sendTo(subscriber, author + text, true)); } }); }); }; public get isInactiveTime() { const timeToEpoch = (h = 0, m = 0) => new Date().setHours(h, m, 0, 0); return this.inactiveHours .map(rangeStr => ((start, end) => ({start, end}))( ...rangeStr.split('-', 2).map(timeStr => timeToEpoch(...timeStr.split(':', 2).map(Number))) as [number, number?] )) .some(range => (now => now >= range.start && now < range.end)(Date.now())); } public workOnFeed = (feed: string) => new Promise(resolve => { const match = /https:\/\/www\.instagram\.com\/([^\/]+)/.exec(feed); if (!match) { logger.error(`current feed "${feed}" is invalid, please remove this feed manually`); return resolve([]); } return resolve(this.queryUserMedia(match[1], this.lock.threads[feed].offset) .catch((error: Error) => { logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`); return []; })); }).then(mediaItems => { const currentThread = this.lock.threads[feed]; const updateDate = () => currentThread.updatedAt = new Date().toString(); if (!mediaItems || mediaItems.length === 0) { updateDate(); return; } const topOfFeed = mediaItems[0].pk; const updateOffset = () => { currentThread.offset = topOfFeed; }; if (currentThread.offset === '-1') { updateOffset(); return; } if (currentThread.offset === '0') mediaItems.splice(1); return this.workOnMedia(mediaItems, this.sendMedia(`thread ${feed}`, ...currentThread.subscribers)) .then(updateDate).then(updateOffset); }); public work = () => { const lock = this.lock; if (this.workInterval < 1) this.workInterval = 1; if (this.isInactiveTime || lock.feed.length === 0) { setTimeout(this.work, this.workInterval * 1000 / lock.feed.length); return; } lock.feed.forEach((feed, index) => { if (!lock.threads[feed] || !lock.threads[feed].subscribers || lock.threads[feed].subscribers.length === 0) { logger.warn(`nobody subscribes thread ${feed}, removing from feed`); delete lock.threads[index]; lock.feed.splice(index, 1); fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock)); } }); const queuedFeeds = lock.feed.slice(0, (lock.workon + 1) || undefined).reverse(); chainPromises(Arr.chunk(queuedFeeds, 5).map((arr, i) => () => Promise.all(arr.map((currentFeed, j) => { const promiseDelay = this.workInterval * (Math.random() + j + 10 - arr.length) * 125 / lock.feed.length; const wait = (ms: number) => isWaitingForLogin ? neverResolves() : promisify(setTimeout)(ms); const startTime = new Date().getTime(); const getTimerTime = () => new Date().getTime() - startTime; const workon = (queuedFeeds.length - 1) - (i * 5 + j); fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock)); if (Date.now() - new Date(lock.threads[currentFeed].updatedAt).getTime() < 3600000) { logger.info(`skipped feed #${workon}: ${currentFeed}, last updated within an hour`); return wait(promiseDelay * 3); } return promisify(setTimeout)(promiseDelay * 3).then(() => { logger.info(`about to pull from feed #${workon}: ${currentFeed}`); if (j === arr.length - 1) logger.info(`timeout for this batch job: ${Math.trunc(promiseDelay)} ms`); const promise = this.workOnFeed(currentFeed).then(() => { lock.workon = workon - 1; if (j === arr.length - 1) { logger.info(`batch job #${workon}-${workon + j} completed after ${getTimerTime()} ms`); } fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock)); }); return Promise.race([promise, wait(promiseDelay * 4)]); }); })) )).then(this.work); }; }