twitter.ts 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. import * as crypto from 'crypto';
  2. import * as fs from 'fs';
  3. import * as http from 'http';
  4. import * as path from 'path';
  5. import { parse as parseUrl } from 'url';
  6. import { promisify } from 'util';
  7. import {
  8. instagramIdToUrlSegment as idToUrlSegment,
  9. urlSegmentToInstagramId as urlSegmentToId
  10. } from 'instagram-id-to-url-segment';
  11. import {
  12. IgApiClient,
  13. IgClientError, IgExactUserNotFoundError, IgLoginTwoFactorRequiredError, IgResponseError,
  14. MediaInfoResponseItemsItem, UserFeedResponseItemsItem
  15. } from 'instagram-private-api';
  16. import { SocksProxyAgent } from 'socks-proxy-agent';
  17. import { getLogger } from './loggers';
  18. import QQBot from './koishi';
  19. import { BigNumOps } from './utils';
  20. import Webshot, { Cookies, Page } from './webshot';
  21. const parseLink = (link: string): { userName?: string, postUrlSegment?: string } => {
  22. let match =
  23. /instagram\.com\/p\/([A-Za-z0-9\-_]+)/.exec(link);
  24. if (match) return { postUrlSegment: match[1] };
  25. match =
  26. /instagram\.com\/([^\/?#]+)/.exec(link) ||
  27. /^([^\/?#]+)$/.exec(link);
  28. if (match) return { userName: ScreenNameNormalizer.normalize(match[1]).split(':')[0] };
  29. return;
  30. };
  31. const isValidUrlSegment = (input: string) => /^[A-Za-z0-9\-_]+$/.test(input);
  32. const linkBuilder = (config: ReturnType<typeof parseLink>): string => {
  33. if (config.userName) return `https://www.instagram.com/${config.userName}/`;
  34. if (config.postUrlSegment) return `https://www.instagram.com/p/${config.postUrlSegment}/`;
  35. };
  36. export { linkBuilder, parseLink, isValidUrlSegment, idToUrlSegment, urlSegmentToId };
  37. interface IWorkerOption {
  38. sessionLockfile: string;
  39. credentials: [string, string];
  40. codeServicePort: number;
  41. proxyUrl: string;
  42. lock: ILock;
  43. lockfile: string;
  44. webshotCookiesLockfile: string;
  45. bot: QQBot;
  46. inactiveHours: string[];
  47. workInterval: number;
  48. webshotDelay: number;
  49. mode: number;
  50. wsUrl: string;
  51. }
  52. export class SessionManager {
  53. private ig: IgApiClient;
  54. private username: string;
  55. private password: string;
  56. private lockfile: string;
  57. private codeServicePort: number;
  58. constructor(client: IgApiClient, file: string, credentials: [string, string], codeServicePort: number) {
  59. this.ig = client;
  60. this.lockfile = file;
  61. [this.username, this.password] = credentials;
  62. this.codeServicePort = codeServicePort;
  63. }
  64. public init = () => {
  65. this.ig.state.generateDevice(this.username);
  66. this.ig.request.end$.subscribe(() => { this.save(); });
  67. const filePath = path.resolve(this.lockfile);
  68. if (fs.existsSync(filePath)) {
  69. try {
  70. const serialized = JSON.parse(fs.readFileSync(filePath, 'utf8')) as { [key: string]: any };
  71. return this.ig.state.deserialize(serialized).then(() => {
  72. logger.info(`successfully loaded client session cookies for user ${this.username}`);
  73. });
  74. } catch (err) {
  75. logger.error(`failed to load client session cookies from file ${this.lockfile}: `, err);
  76. return Promise.resolve();
  77. }
  78. } else {
  79. return this.login().catch((err: IgClientError) => {
  80. logger.error(`error while trying to log in as user ${this.username}, error: ${err}`);
  81. logger.warn('attempting to retry after 1 minute...');
  82. if (fs.existsSync(filePath)) fs.unlinkSync(filePath);
  83. promisify(setTimeout)(60000).then(this.init);
  84. });
  85. }
  86. };
  87. public handle2FA = <T>(submitter: (code: string) => Promise<T>) => new Promise<T>((resolve, reject) => {
  88. const token = crypto.randomBytes(20).toString('hex');
  89. logger.info('please submit the code with a one-time token from your browser with this path:');
  90. logger.info(`/confirm-2fa?code=<the code you received>&token=${token}`);
  91. let working;
  92. const server = http.createServer((req, res) => {
  93. const {pathname, query} = parseUrl(req.url, true);
  94. if (!working && pathname === '/confirm-2fa' && query.token === token &&
  95. typeof(query.code) === 'string' && /^\d{6}$/.test(query.code)) {
  96. const code = query.code;
  97. logger.debug(`received code: ${code}`);
  98. working = true;
  99. submitter(code)
  100. .then(response => { res.write('OK'); res.end(); server.close(() => resolve(response)); })
  101. .catch(err => { res.write('Error'); res.end(); reject(err); })
  102. .finally(() => { working = false; });
  103. }
  104. });
  105. server.listen(this.codeServicePort);
  106. });
  107. public login = () =>
  108. this.ig.simulate.preLoginFlow()
  109. .then(() => this.ig.account.login(this.username, this.password))
  110. .catch((err: IgClientError) => {
  111. if (err instanceof IgLoginTwoFactorRequiredError) {
  112. const {two_factor_identifier, totp_two_factor_on} = err.response.body.two_factor_info;
  113. logger.debug(`2FA info: ${JSON.stringify(err.response.body.two_factor_info)}`);
  114. logger.info(`login is requesting two-factor authentication via ${totp_two_factor_on ? 'TOTP' : 'SMS'}`);
  115. return this.handle2FA(code => this.ig.account.twoFactorLogin({
  116. username: this.username,
  117. verificationCode: code,
  118. twoFactorIdentifier: two_factor_identifier,
  119. verificationMethod: totp_two_factor_on ? '0' : '1',
  120. }));
  121. }
  122. throw err;
  123. })
  124. .then(user => new Promise<typeof user>(resolve => {
  125. logger.info(`successfully logged in as ${this.username}`);
  126. process.nextTick(() => resolve(this.ig.simulate.postLoginFlow().then(() => user)));
  127. }));
  128. public save = () =>
  129. this.ig.state.serialize()
  130. .then((serialized: { [key: string]: any }) => {
  131. delete serialized.constants;
  132. return fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(serialized, null, 2), 'utf-8');
  133. });
  134. }
  135. export class ScreenNameNormalizer {
  136. // tslint:disable-next-line: variable-name
  137. public static _queryUser: (username: string) => Promise<string>;
  138. public static normalize = (username: string) => `${username.toLowerCase().replace(/^@/, '')}:`;
  139. public static async normalizeLive(username: string) {
  140. if (this._queryUser) {
  141. return await this._queryUser(username)
  142. .catch((err: IgClientError) => {
  143. if (!(err instanceof IgExactUserNotFoundError)) {
  144. logger.warn(`error looking up user: ${err.message}`);
  145. return `${username}:`;
  146. }
  147. return null;
  148. });
  149. }
  150. return this.normalize(username);
  151. }
  152. }
  153. let browserLogin = (page: Page): Promise<void> => Promise.reject();
  154. let browserSaveCookies = browserLogin;
  155. let isWaitingForLogin = false;
  156. const acceptCookieConsent = (page: Page) =>
  157. page.click('button:has-text("すべて許可")', { timeout: 5000 })
  158. .then(() => logger.info('accepted cookie consent'))
  159. .catch((err: Error) => { if (err.name !== 'TimeoutError') throw err; });
  160. export const WebshotHelpers = {
  161. handleLogin: browserLogin,
  162. handleCookieConsent: acceptCookieConsent,
  163. get isWaitingForLogin() { return isWaitingForLogin; },
  164. };
  165. export let getPostOwner = (segmentId: string): Promise<string> => Promise.reject();
  166. export let sendPost = (segmentId: string, receiver: IChat): void => {
  167. throw Error();
  168. };
  169. export type MediaItem = MediaInfoResponseItemsItem & UserFeedResponseItemsItem;
  170. export type LazyMediaItem = {
  171. pk: string,
  172. item: () => Promise<MediaItem>,
  173. };
  174. const logger = getLogger('instagram');
  175. const maxTrials = 3;
  176. const retryInterval = 1500;
  177. const ordinal = (n: number) => {
  178. switch ((Math.trunc(n / 10) % 10 === 1) ? 0 : n % 10) {
  179. case 1:
  180. return `${n}st`;
  181. case 2:
  182. return `${n}nd`;
  183. case 3:
  184. return `${n}rd`;
  185. default:
  186. return `${n}th`;
  187. }
  188. };
  189. const retryOnError = <T, U>(
  190. doWork: () => Promise<T>,
  191. onRetry: (error, count: number, terminate: (defaultValue: U) => void) => void
  192. ) => new Promise<T | U>(resolve => {
  193. const retry = (reason, count: number) => {
  194. setTimeout(() => {
  195. let terminate = false;
  196. onRetry(reason, count, defaultValue => { terminate = true; resolve(defaultValue); });
  197. if (!terminate) doWork().then(resolve).catch(error => retry(error, count + 1));
  198. }, retryInterval);
  199. };
  200. doWork().then(resolve).catch(error => retry(error, 1));
  201. });
  202. export default class {
  203. private client: IgApiClient;
  204. private lock: ILock;
  205. private lockfile: string;
  206. private inactiveHours: string[];
  207. private workInterval: number;
  208. private bot: QQBot;
  209. private webshotDelay: number;
  210. private webshotCookies: Cookies = [];
  211. private webshotCookiesLockfile: string;
  212. private webshot: Webshot;
  213. private mode: number;
  214. private wsUrl: string;
  215. public session: SessionManager;
  216. constructor(opt: IWorkerOption) {
  217. this.client = new IgApiClient();
  218. if (opt.proxyUrl) {
  219. try {
  220. const url = new URL(opt.proxyUrl);
  221. if (!/^socks(?:4a?|5h?)?:$/.test(url.protocol)) throw Error();
  222. if (!url.port) url.port = '1080';
  223. this.client.request.defaults.agent = new SocksProxyAgent({
  224. hostname: url.hostname,
  225. port: url.port,
  226. userId: url.username,
  227. password: url.password,
  228. });
  229. } catch (e) {
  230. logger.warn(`invalid socks proxy url: ${opt.proxyUrl}, ignoring`);
  231. }
  232. }
  233. this.session = new SessionManager(this.client, opt.sessionLockfile, opt.credentials, opt.codeServicePort);
  234. this.lockfile = opt.lockfile;
  235. this.webshotCookiesLockfile = opt.webshotCookiesLockfile;
  236. this.lock = opt.lock;
  237. this.inactiveHours = opt.inactiveHours;
  238. this.workInterval = opt.workInterval;
  239. this.bot = opt.bot;
  240. this.webshotDelay = opt.webshotDelay;
  241. this.mode = opt.mode;
  242. this.wsUrl = opt.wsUrl;
  243. const cookiesFilePath = path.resolve(this.webshotCookiesLockfile);
  244. try {
  245. this.webshotCookies = JSON.parse(fs.readFileSync(cookiesFilePath, 'utf8')) as Cookies;
  246. logger.info(`loaded webshot cookies from file ${this.webshotCookiesLockfile}`);
  247. } catch (err) {
  248. logger.warn(
  249. `failed to load webshot cookies from file ${this.webshotCookiesLockfile}: `,
  250. (err as Error).message
  251. );
  252. logger.warn('cookies will be saved to this file when needed');
  253. }
  254. browserLogin = page =>
  255. page.fill('input[name="username"]', opt.credentials[0], {timeout: 0})
  256. .then(() => { isWaitingForLogin = true; logger.warn('blocked by login dialog, trying to log in manually...'); })
  257. .then(() => page.fill('input[name="password"]', opt.credentials[1], {timeout: 0}))
  258. .then(() => page.click('button[type="submit"]', {timeout: 0}))
  259. .then(() =>
  260. (next => Promise.race([
  261. page.waitForSelector('#verificationCodeDescription', {timeout: 0}).then(handle => handle.innerText()).then(text => {
  262. logger.info(`login is requesting two-factor authentication via ${/認証アプリ/.test(text) ? 'TOTP' : 'SMS'}`);
  263. return this.session.handle2FA(code => page.fill('input[name="verificationCode"]', code, {timeout: 0}))
  264. .then(() => page.click('button:has-text("実行")', {timeout: 0}))
  265. .then(next);
  266. }),
  267. page.waitForResponse(res => res.status() === 429, {timeout: 0})
  268. .then(() => { logger.error('fatal error: login restricted: code 429, exiting'); process.exit(1); }),
  269. next(),
  270. ]))(() => page.click('button:has-text("情報を保存")', {timeout: 0}).then(() => { isWaitingForLogin = false; }))
  271. );
  272. browserSaveCookies = page =>
  273. page.context().cookies()
  274. .then(cookies => {
  275. this.webshotCookies = cookies;
  276. logger.info('successfully logged in, saving cookies to file...');
  277. fs.writeFileSync(path.resolve(this.webshotCookiesLockfile), JSON.stringify(cookies, null, 2), 'utf-8');
  278. });
  279. WebshotHelpers.handleLogin = page =>
  280. browserLogin(page)
  281. .then(() => page.waitForSelector('img[data-testid="user-avatar"]', { timeout: this.webshotDelay }))
  282. .then(() => browserSaveCookies(page))
  283. .catch((err: Error) => {
  284. if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; }
  285. throw err;
  286. });
  287. ScreenNameNormalizer._queryUser = this.queryUser;
  288. const parseMediaError = (err: IgClientError) => {
  289. if (!(err instanceof IgResponseError && err.text === 'Media not found or unavailable')) {
  290. logger.warn(`error retrieving instagram media: ${err.message}`);
  291. return `获取媒体时出现错误:${err.message}`;
  292. }
  293. return '找不到请求的媒体,它可能已被删除。';
  294. };
  295. getPostOwner = (segmentId) =>
  296. this.client.media.info(urlSegmentToId(segmentId))
  297. .then(media => media.items[0].user)
  298. .then(user => `${user.username}:${user.pk}`)
  299. .catch((err: IgClientError) => { throw Error(parseMediaError(err)); });
  300. sendPost = (segmentId, receiver) => {
  301. this.getMedia(segmentId, this.sendMedia(`instagram media ${segmentId}`, receiver))
  302. .catch((err: IgClientError) => { this.bot.sendTo(receiver, parseMediaError(err)); });
  303. };
  304. }
  305. public launch = () => {
  306. this.webshot = new Webshot(
  307. this.wsUrl,
  308. this.mode,
  309. () => this.webshotCookies,
  310. doOnNewPage => {
  311. this.queryUserMedia = ((userName, targetId) => {
  312. let page: Page;
  313. const url = linkBuilder({ userName });
  314. logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
  315. return doOnNewPage(newPage => {
  316. page = newPage;
  317. let timeout = this.workInterval * 1000;
  318. const startTime = new Date().getTime();
  319. const getTimerTime = () => new Date().getTime() - startTime;
  320. const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, timeout - getTimerTime());
  321. return page.context().addCookies(this.webshotCookies)
  322. .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
  323. .then(response => {
  324. if (response.status() !== 200) {
  325. const err = new Error(
  326. `error navigating to user page, error was: ${response.status()} ${response.statusText()}`
  327. );
  328. throw Object.defineProperty(err, 'name', {
  329. value: 'ResponseError',
  330. });
  331. }
  332. }).then(() => acceptCookieConsent(page))
  333. .then(() =>
  334. (next => Promise.race([
  335. browserLogin(page)
  336. .catch((err: Error) => {
  337. if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; }
  338. throw err;
  339. })
  340. .then(() => browserSaveCookies(page))
  341. .then(() => page.goto(url)).then(next),
  342. next(),
  343. ]))(() => promisify(setTimeout)(2000).then(() => page.waitForSelector('article', {timeout: getTimeout()})))
  344. ).then(handle => {
  345. const postHandler = () => {
  346. const toId = (href: string) => urlSegmentToId((/\/p\/(.*)\/$/.exec(href) ?? [,''])[1]);
  347. if (targetId === '0') {
  348. return handle.$$eval('a', as =>
  349. as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href
  350. ).then(href => Number(toId(href)) > 0 ? [toId(href)] : []);
  351. }
  352. return handle.$$eval('a', as =>
  353. as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)
  354. ).then(hrefs => {
  355. let id: string;
  356. const itemIds: string[] = [];
  357. for (const href of hrefs) {
  358. id = toId(href);
  359. if (id && BigNumOps.compare(id, targetId) > 0) itemIds.push(id);
  360. else return itemIds;
  361. }
  362. logger.info('unable to find a smaller id than target, trying on next page...');
  363. return null; // has more
  364. });
  365. };
  366. return postHandler().then(itemIds => {
  367. if (itemIds) return itemIds;
  368. timeout += this.workInterval * 500;
  369. return handle.$$('a')
  370. .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
  371. .then(loadedCount => page.waitForFunction(count =>
  372. document.querySelectorAll('article a').length > count
  373. , loadedCount))
  374. .then(postHandler);
  375. });
  376. }).catch((err: Error) => {
  377. if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err;
  378. if (err.name === 'ResponseError') {
  379. logger.warn(`error while fetching tweets for ${userName}: ${err.message}`);
  380. } else logger.warn(`navigation timed out at ${getTimerTime()} ms`);
  381. return [] as string[];
  382. }).then(itemIds => promisify(setTimeout)(getTimeout()).then(() =>
  383. itemIds.map(id => this.lazyGetMediaById(id))
  384. ));
  385. }).finally(() => { page.close(); });
  386. });
  387. setTimeout(this.work, this.workInterval * 1000);
  388. }
  389. );
  390. };
  391. public queryUserMedia: (username: string, targetId?: string) => Promise<LazyMediaItem[]>;
  392. public queryUser = (username: string) => this.client.user.searchExact(username)
  393. .then(user => `${user.username}:${user.pk}`);
  394. private workOnMedia = (
  395. lazyMediaItems: LazyMediaItem[],
  396. sendMedia: (msg: string, text: string, author: string) => void
  397. ) => this.webshot(lazyMediaItems, sendMedia, this.webshotDelay);
  398. public urlSegmentToId = urlSegmentToId;
  399. public lazyGetMediaById = (id: string): LazyMediaItem => ({
  400. pk: id,
  401. item: () => this.client.media.info(id).then(media => {
  402. const mediaItem = media.items[0] as MediaItem;
  403. logger.debug(`api returned media post ${JSON.stringify(mediaItem)} for query id=${id}`);
  404. return mediaItem;
  405. }),
  406. });
  407. private getMedia = (segmentId: string, sender: (msg: string, text: string, author: string) => void) =>
  408. this.workOnMedia([this.lazyGetMediaById(urlSegmentToId(segmentId))], sender);
  409. private sendMedia = (source?: string, ...to: IChat[]) => (msg: string, text: string, author: string) => {
  410. to.forEach(subscriber => {
  411. logger.info(`pushing data${source ? ` of ${source}` : ''} to ${JSON.stringify(subscriber)}`);
  412. retryOnError(
  413. () => this.bot.sendTo(subscriber, msg),
  414. (_, count, terminate: (doNothing: Promise<void>) => void) => {
  415. if (count <= maxTrials) {
  416. logger.warn(`retry sending to ${subscriber.chatID} for the ${ordinal(count)} time...`);
  417. } else {
  418. logger.warn(`${count - 1} consecutive failures while sending message chain, trying plain text instead...`);
  419. terminate(this.bot.sendTo(subscriber, author + text, true));
  420. }
  421. });
  422. });
  423. };
  424. public get isInactiveTime() {
  425. const timeToEpoch = (h = 0, m = 0) => new Date().setHours(h, m, 0, 0);
  426. return this.inactiveHours
  427. .map(rangeStr => ((start, end) => ({start, end}))(
  428. ...rangeStr.split('-', 2).map(timeStr => timeToEpoch(...timeStr.split(':', 2).map(Number))) as [number, number?]
  429. ))
  430. .some(range => (now => now >= range.start && now < range.end)(Date.now()));
  431. }
  432. public work = () => {
  433. const lock = this.lock;
  434. if (this.workInterval < 1) this.workInterval = 1;
  435. if (this.isInactiveTime || lock.feed.length === 0) {
  436. setTimeout(this.work, this.workInterval * 1000);
  437. return;
  438. }
  439. if (lock.workon >= lock.feed.length) lock.workon = 0;
  440. if (!lock.threads[lock.feed[lock.workon]] ||
  441. !lock.threads[lock.feed[lock.workon]].subscribers ||
  442. lock.threads[lock.feed[lock.workon]].subscribers.length === 0) {
  443. logger.warn(`nobody subscribes thread ${lock.feed[lock.workon]}, removing from feed`);
  444. delete lock.threads[lock.feed[lock.workon]];
  445. lock.feed.splice(lock.workon, 1);
  446. fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock));
  447. this.work();
  448. return;
  449. }
  450. const currentFeed = lock.feed[lock.workon];
  451. const promise = new Promise<LazyMediaItem[]>(resolve => {
  452. const match = /https:\/\/www\.instagram\.com\/([^\/]+)/.exec(currentFeed);
  453. if (!match) {
  454. logger.error(`current feed "${currentFeed}" is invalid, please remove this feed manually`);
  455. return resolve([]);
  456. }
  457. this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset)
  458. .then(resolve)
  459. .catch((error: Error) => {
  460. logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
  461. resolve([]);
  462. });
  463. });
  464. promise.then((mediaItems: LazyMediaItem[]) => {
  465. const currentThread = lock.threads[currentFeed];
  466. const updateDate = () => currentThread.updatedAt = new Date().toString();
  467. if (!mediaItems || mediaItems.length === 0) { updateDate(); return; }
  468. const topOfFeed = mediaItems[0].pk;
  469. const updateOffset = () => currentThread.offset = topOfFeed;
  470. if (currentThread.offset === '-1') { updateOffset(); return; }
  471. if (currentThread.offset === '0') mediaItems.splice(1);
  472. return this.workOnMedia(mediaItems, this.sendMedia(`thread ${currentFeed}`, ...currentThread.subscribers))
  473. .then(updateDate).then(updateOffset);
  474. })
  475. .then(() => {
  476. lock.workon++;
  477. let timeout = this.workInterval * 1000 / lock.feed.length;
  478. if (timeout < 1000) timeout = 1000;
  479. fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock));
  480. setTimeout(() => {
  481. this.work();
  482. }, timeout);
  483. });
  484. };
  485. }