123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477 |
- "use strict";
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
- return new (P || (P = Promise))(function (resolve, reject) {
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
- step((generator = generator.apply(thisArg, _arguments || [])).next());
- });
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.sendPost = exports.getPostOwner = exports.WebshotHelpers = exports.ScreenNameNormalizer = exports.SessionManager = exports.urlSegmentToId = exports.idToUrlSegment = exports.parseLink = exports.linkBuilder = exports.graphqlLinkBuilder = void 0;
- const crypto = require("crypto");
- const fs = require("fs");
- const http = require("http");
- const path = require("path");
- const url_1 = require("url");
- const util_1 = require("util");
- const instagram_id_to_url_segment_1 = require("instagram-id-to-url-segment");
- Object.defineProperty(exports, "idToUrlSegment", { enumerable: true, get: function () { return instagram_id_to_url_segment_1.instagramIdToUrlSegment; } });
- const instagram_private_api_1 = require("instagram-private-api");
- const socks_proxy_agent_1 = require("socks-proxy-agent");
- const loggers_1 = require("./loggers");
- const utils_1 = require("./utils");
- const webshot_1 = require("./webshot");
- const parseLink = (link) => {
- let match = /instagram\.com\/p\/([A-Za-z0-9\-_]+)/.exec(link);
- if (match)
- return { postUrlSegment: match[1] };
- match =
- /instagram\.com\/([^\/?#]+)/.exec(link) ||
- /^([^\/?#]+)$/.exec(link);
- if (match)
- return { userName: ScreenNameNormalizer.normalize(match[1]).split(':')[0] };
- return;
- };
- exports.parseLink = parseLink;
- const linkBuilder = (config) => {
- if (config.userName)
- return `https://www.instagram.com/${config.userName}/`;
- if (config.postUrlSegment)
- return `https://www.instagram.com/p/${config.postUrlSegment}/`;
- };
- exports.linkBuilder = linkBuilder;
- const graphqlLinkBuilder = ({ userId, first = '12', after }) => `https://www.instagram.com/graphql/query/\
- ?query_id=17888483320059182&id=${userId}&first=${first}${after ? `&after=${after}` : ''}`;
- exports.graphqlLinkBuilder = graphqlLinkBuilder;
- const urlSegmentToId = (urlSegment) => urlSegment.length <= 28 ?
- instagram_id_to_url_segment_1.urlSegmentToInstagramId(urlSegment) : instagram_id_to_url_segment_1.urlSegmentToInstagramId(urlSegment.slice(0, -28));
- exports.urlSegmentToId = urlSegmentToId;
- class SessionManager {
- constructor(client, file, credentials, codeServicePort) {
- this.init = () => {
- this.ig.state.generateDevice(this.username);
- this.ig.request.end$.subscribe(() => { this.save(); });
- const filePath = path.resolve(this.lockfile);
- if (fs.existsSync(filePath)) {
- try {
- const serialized = JSON.parse(fs.readFileSync(filePath, 'utf8'));
- return this.ig.state.deserialize(serialized).then(() => {
- logger.info(`successfully loaded client session cookies for user ${this.username}`);
- });
- }
- catch (err) {
- logger.error(`failed to load client session cookies from file ${this.lockfile}: `, err);
- return Promise.resolve();
- }
- }
- else {
- return this.login().catch((err) => {
- logger.error(`error while trying to log in as user ${this.username}, error: ${err}`);
- logger.warn('attempting to retry after 1 minute...');
- if (fs.existsSync(filePath))
- fs.unlinkSync(filePath);
- util_1.promisify(setTimeout)(60000).then(this.init);
- });
- }
- };
- this.handle2FA = (submitter) => new Promise((resolve, reject) => {
- const token = crypto.randomBytes(20).toString('hex');
- logger.info('please submit the code with a one-time token from your browser with this path:');
- logger.info(`/confirm-2fa?code=<the code you received>&token=${token}`);
- let working;
- const server = http.createServer((req, res) => {
- const { pathname, query } = url_1.parse(req.url, true);
- if (!working && pathname === '/confirm-2fa' && query.token === token &&
- typeof (query.code) === 'string' && /^\d{6}$/.test(query.code)) {
- const code = query.code;
- logger.debug(`received code: ${code}`);
- working = true;
- submitter(code)
- .then(response => { res.write('OK'); res.end(); server.close(() => resolve(response)); })
- .catch(err => { res.write('Error'); res.end(); reject(err); })
- .finally(() => { working = false; });
- }
- });
- server.listen(this.codeServicePort);
- });
- this.login = () => this.ig.simulate.preLoginFlow()
- .then(() => this.ig.account.login(this.username, this.password))
- .catch((err) => {
- if (err instanceof instagram_private_api_1.IgLoginTwoFactorRequiredError) {
- const { two_factor_identifier, totp_two_factor_on } = err.response.body.two_factor_info;
- logger.debug(`2FA info: ${JSON.stringify(err.response.body.two_factor_info)}`);
- logger.info(`login is requesting two-factor authentication via ${totp_two_factor_on ? 'TOTP' : 'SMS'}`);
- return this.handle2FA(code => this.ig.account.twoFactorLogin({
- username: this.username,
- verificationCode: code,
- twoFactorIdentifier: two_factor_identifier,
- verificationMethod: totp_two_factor_on ? '0' : '1',
- }));
- }
- throw err;
- })
- .then(user => new Promise(resolve => {
- logger.info(`successfully logged in as ${this.username}`);
- process.nextTick(() => resolve(this.ig.simulate.postLoginFlow().then(() => user)));
- }));
- this.save = () => this.ig.state.serialize()
- .then((serialized) => {
- delete serialized.constants;
- return fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(serialized, null, 2), 'utf-8');
- });
- this.ig = client;
- this.lockfile = file;
- [this.username, this.password] = credentials;
- this.codeServicePort = codeServicePort;
- }
- }
- exports.SessionManager = SessionManager;
- class ScreenNameNormalizer {
- static normalizeLive(username) {
- return __awaiter(this, void 0, void 0, function* () {
- if (this._queryUser) {
- return yield this._queryUser(username)
- .catch((err) => {
- if (!(err instanceof instagram_private_api_1.IgExactUserNotFoundError)) {
- logger.warn(`error looking up user: ${err.message}`);
- return `${username}:`;
- }
- return null;
- });
- }
- return this.normalize(username);
- });
- }
- }
- exports.ScreenNameNormalizer = ScreenNameNormalizer;
- ScreenNameNormalizer.normalize = (username) => `${username.toLowerCase().replace(/^@/, '')}:`;
- let browserLogin = (page) => Promise.resolve();
- let browserSaveCookies = browserLogin;
- let isWaitingForLogin = false;
- const acceptCookieConsent = (page) => page.click('button:has-text("すべて許可")', { timeout: 5000 })
- .then(() => logger.info('accepted cookie consent'))
- .catch((err) => { if (err.name !== 'TimeoutError')
- throw err; });
- exports.WebshotHelpers = {
- handleLogin: browserLogin,
- handleCookieConsent: acceptCookieConsent,
- get isWaitingForLogin() { return isWaitingForLogin; },
- };
- let getPostOwner = (segmentId) => Promise.reject();
- exports.getPostOwner = getPostOwner;
- let sendPost = (segmentId, receiver) => {
- throw Error();
- };
- exports.sendPost = sendPost;
- const logger = loggers_1.getLogger('instagram');
- const maxTrials = 3;
- const retryInterval = 1500;
- const ordinal = (n) => {
- switch ((Math.trunc(n / 10) % 10 === 1) ? 0 : n % 10) {
- case 1:
- return `${n}st`;
- case 2:
- return `${n}nd`;
- case 3:
- return `${n}rd`;
- default:
- return `${n}th`;
- }
- };
- const retryOnError = (doWork, onRetry) => new Promise(resolve => {
- const retry = (reason, count) => {
- setTimeout(() => {
- let terminate = false;
- onRetry(reason, count, defaultValue => { terminate = true; resolve(defaultValue); });
- if (!terminate)
- doWork().then(resolve).catch(error => retry(error, count + 1));
- }, retryInterval);
- };
- doWork().then(resolve).catch(error => retry(error, 1));
- });
- class default_1 {
- constructor(opt) {
- this.webshotCookies = [];
- this.launch = () => {
- this.webshot = new webshot_1.default(this.wsUrl, this.mode, () => this.webshotCookies, doOnNewPage => {
- this.queryUserMedia = ((userName, targetId) => {
- let page;
- let url = linkBuilder({ userName }) + '?__a=1';
- logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
- return doOnNewPage(newPage => {
- page = newPage;
- let timeout = this.webshotDelay / 2;
- const startTime = new Date().getTime();
- const getTimerTime = () => new Date().getTime() - startTime;
- const getTimeout = () => isWaitingForLogin ? 0 : Math.max(5000, timeout - getTimerTime());
- return page.context().addCookies(this.webshotCookies)
- .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
- .then(response => {
- const itemIds = [];
- const redirectionHandler = () => acceptCookieConsent(page)
- .then(() => browserLogin(page))
- .catch((err) => {
- if (err.name === 'TimeoutError') {
- logger.warn('navigation timed out, assuming login has failed');
- isWaitingForLogin = false;
- }
- throw err;
- })
- .then(() => browserSaveCookies(page))
- .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
- .then(responseHandler);
- const responseHandler = (res) => {
- if (res.status() !== 200) {
- throw utils_1.customError('ResponseError')(`error navigating to user page, error was: ${res.status()} ${res.statusText()}`);
- }
- return res.json()
- .catch(redirectionHandler)
- .then((json) => {
- var _a;
- if (!json || !((_a = (json.graphql || json.data)) === null || _a === void 0 ? void 0 : _a.user)) {
- logger.warn('error parsing graphql response, returning empty object...');
- const data = { user: { edge_owner_to_timeline_media: { edges: [] } } };
- return { graphql: data, data };
- }
- return json;
- });
- };
- const jsonHandler = ({ user }) => {
- const pageInfo = user.edge_owner_to_timeline_media.page_info;
- for (const { node } of user.edge_owner_to_timeline_media.edges) {
- if (node.__typename === 'GraphVideo' && node.product_type === 'igtv')
- continue;
- if (node.id && utils_1.BigNumOps.compare(node.id, targetId) > 0)
- itemIds.push(node.id);
- else
- return itemIds;
- if (Number(targetId) < 1)
- return itemIds;
- }
- if (!(pageInfo === null || pageInfo === void 0 ? void 0 : pageInfo.has_next_page))
- return itemIds;
- logger.info('unable to find a smaller id than target, trying on next page...');
- url = graphqlLinkBuilder({ userId: user.id, after: pageInfo.end_cursor });
- const nextPageDelay = this.webshotDelay * (0.4 + Math.random() * 0.1);
- timeout += nextPageDelay;
- return util_1.promisify(setTimeout)(nextPageDelay)
- .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
- .then(responseHandler)
- .then(({ data }) => jsonHandler(data));
- };
- return responseHandler(response)
- .then(({ graphql }) => jsonHandler(graphql));
- }).catch((err) => {
- if (err.name !== 'TimeoutError' && err.name !== 'ResponseError')
- throw err;
- if (err.name === 'ResponseError') {
- logger.warn(`error while fetching posts by @${userName}: ${err.message}`);
- }
- else
- logger.warn(`navigation timed out at ${getTimerTime()} ms`);
- return [];
- }).then(itemIds => util_1.promisify(setTimeout)(getTimeout()).then(() => itemIds.map(id => this.lazyGetMediaById(id))));
- }).finally(() => { page.close(); });
- });
- setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length);
- });
- };
- this.queryUser = (username) => this.client.user.searchExact(username)
- .catch((error) => {
- if (error instanceof instagram_private_api_1.IgLoginRequiredError) {
- logger.warn('login required, logging in again...');
- return this.session.login().then(() => this.client.user.searchExact(username));
- }
- else
- throw error;
- })
- .then(user => `${user.username}:${user.pk}`);
- this.workOnMedia = (lazyMediaItems, sendMedia) => this.webshot(lazyMediaItems, sendMedia, this.webshotDelay);
- this.urlSegmentToId = urlSegmentToId;
- this.lazyGetMediaById = (id) => ({
- pk: id,
- item: () => this.client.media.info(id).then(media => {
- const mediaItem = media.items[0];
- logger.debug(`api returned media post ${JSON.stringify(mediaItem)} for query id=${id}`);
- return mediaItem;
- }),
- });
- this.getMedia = (segmentId, sender) => this.workOnMedia([this.lazyGetMediaById(urlSegmentToId(segmentId))], sender);
- this.sendMedia = (source, ...to) => (msg, text, author) => {
- to.forEach(subscriber => {
- logger.info(`pushing data${source ? ` of ${source}` : ''} to ${JSON.stringify(subscriber)}`);
- retryOnError(() => this.bot.sendTo(subscriber, msg), (_, count, terminate) => {
- if (count <= maxTrials) {
- logger.warn(`retry sending to ${subscriber.chatID} for the ${ordinal(count)} time...`);
- }
- else {
- logger.warn(`${count - 1} consecutive failures while sending message chain, trying plain text instead...`);
- terminate(this.bot.sendTo(subscriber, author + text, true));
- }
- });
- });
- };
- this.work = () => {
- const lock = this.lock;
- if (this.workInterval < 1)
- this.workInterval = 1;
- if (this.isInactiveTime || lock.feed.length === 0) {
- setTimeout(this.work, this.workInterval * 1000 / lock.feed.length);
- return;
- }
- lock.feed.forEach((feed, index) => {
- if (!lock.threads[feed] ||
- !lock.threads[feed].subscribers ||
- lock.threads[feed].subscribers.length === 0) {
- logger.warn(`nobody subscribes thread ${feed}, removing from feed`);
- delete lock.threads[index];
- lock.feed.splice(index, 1);
- fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock));
- }
- });
- const queuedFeeds = lock.feed.slice(0, (lock.workon + 1) || undefined).reverse();
- utils_1.chainPromises(utils_1.Arr.chunk(queuedFeeds, 5).map((arr, i) => () => Promise.all(arr.map((currentFeed, j) => {
- const workon = (queuedFeeds.length - 1) - (i * 5 + j);
- fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock));
- const promiseDelay = this.workInterval * (Math.random() + j) * 250 / lock.feed.length;
- const startTime = new Date().getTime();
- const getTimerTime = () => new Date().getTime() - startTime;
- const promise = util_1.promisify(setTimeout)(promiseDelay * 3).then(() => {
- logger.info(`about to pull from feed #${workon}: ${currentFeed}`);
- if (j === arr.length - 1)
- logger.info(`timeout for this batch job: ${Math.trunc(promiseDelay)} ms`);
- const match = /https:\/\/www\.instagram\.com\/([^\/]+)/.exec(currentFeed);
- if (!match) {
- logger.error(`current feed "${currentFeed}" is invalid, please remove this feed manually`);
- return [];
- }
- return this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset)
- .catch((error) => {
- logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
- return [];
- });
- }).then((mediaItems) => {
- const currentThread = lock.threads[currentFeed];
- const updateDate = () => currentThread.updatedAt = new Date().toString();
- if (!mediaItems || mediaItems.length === 0) {
- updateDate();
- return;
- }
- const topOfFeed = mediaItems[0].pk;
- const updateOffset = () => currentThread.offset = topOfFeed;
- if (currentThread.offset === '-1') {
- updateOffset();
- return;
- }
- return this.workOnMedia(mediaItems, this.sendMedia(`thread ${currentFeed}`, ...currentThread.subscribers))
- .then(updateDate).then(updateOffset);
- }).then(() => {
- lock.workon = workon - 1;
- if (j === arr.length - 1) {
- logger.info(`batch job #${workon}-${workon + j} completed after ${getTimerTime()} ms`);
- }
- fs.writeFileSync(path.resolve(this.lockfile), JSON.stringify(lock));
- });
- return Promise.race([promise, isWaitingForLogin ? utils_1.neverResolves() : util_1.promisify(setTimeout)(promiseDelay * 4)]);
- })))).then(this.work);
- };
- this.client = new instagram_private_api_1.IgApiClient();
- if (opt.proxyUrl) {
- try {
- const url = new URL(opt.proxyUrl);
- if (!/^socks(?:4a?|5h?)?:$/.test(url.protocol))
- throw Error();
- if (!url.port)
- url.port = '1080';
- this.client.request.defaults.agent = new socks_proxy_agent_1.SocksProxyAgent({
- hostname: url.hostname,
- port: url.port,
- userId: url.username,
- password: url.password,
- });
- }
- catch (e) {
- logger.warn(`invalid socks proxy url: ${opt.proxyUrl}, ignoring`);
- }
- }
- this.session = new SessionManager(this.client, opt.sessionLockfile, opt.credentials, opt.codeServicePort);
- this.lockfile = opt.lockfile;
- this.webshotCookiesLockfile = opt.webshotCookiesLockfile;
- this.lock = opt.lock;
- this.inactiveHours = opt.inactiveHours;
- this.workInterval = opt.workInterval;
- this.bot = opt.bot;
- this.webshotDelay = opt.webshotDelay;
- this.mode = opt.mode;
- this.wsUrl = opt.wsUrl;
- const cookiesFilePath = path.resolve(this.webshotCookiesLockfile);
- try {
- this.webshotCookies = JSON.parse(fs.readFileSync(cookiesFilePath, 'utf8'));
- logger.info(`loaded webshot cookies from file ${this.webshotCookiesLockfile}`);
- }
- catch (err) {
- logger.warn(`failed to load webshot cookies from file ${this.webshotCookiesLockfile}: `, err.message);
- logger.warn('cookies will be saved to this file when needed');
- }
- browserLogin = page => page.fill('input[name="username"]', opt.credentials[0], { timeout: 0 })
- .then(() => {
- if (isWaitingForLogin !== true)
- return;
- logger.warn('still waiting for login, pausing execution...');
- return utils_1.neverResolves();
- })
- .then(() => { isWaitingForLogin = true; logger.warn('blocked by login dialog, trying to log in manually...'); })
- .then(() => page.fill('input[name="password"]', opt.credentials[1], { timeout: 0 }))
- .then(() => page.click('button[type="submit"]', { timeout: 0 }))
- .then(() => (next => Promise.race([
- page.waitForSelector('#verificationCodeDescription', { timeout: 0 }).then(handle => handle.innerText()).then(text => {
- logger.info(`login is requesting two-factor authentication via ${/認証アプリ/.test(text) ? 'TOTP' : 'SMS'}`);
- return this.session.handle2FA(code => page.fill('input[name="verificationCode"]', code, { timeout: 0 }))
- .then(() => page.click('button:has-text("実行")', { timeout: 0 }))
- .then(next);
- }),
- next(),
- ]))(() => page.click('button:has-text("情報を保存")', { timeout: 0 }).then(() => { isWaitingForLogin = false; })));
- browserSaveCookies = page => page.context().cookies()
- .then(cookies => {
- this.webshotCookies = cookies;
- logger.info('successfully logged in, saving cookies to file...');
- fs.writeFileSync(path.resolve(this.webshotCookiesLockfile), JSON.stringify(cookies, null, 2), 'utf-8');
- });
- exports.WebshotHelpers.handleLogin = page => browserLogin(page)
- .then(() => page.waitForSelector('img[data-testid="user-avatar"]', { timeout: this.webshotDelay }))
- .then(() => browserSaveCookies(page))
- .catch((err) => {
- if (err.name === 'TimeoutError') {
- logger.warn('navigation timed out, assuming login has failed');
- isWaitingForLogin = false;
- }
- throw err;
- });
- ScreenNameNormalizer._queryUser = this.queryUser;
- const parseMediaError = (err) => {
- if (!(err instanceof instagram_private_api_1.IgResponseError && err.text === 'Media not found or unavailable')) {
- logger.warn(`error retrieving instagram media: ${err.message}`);
- return `获取媒体时出现错误:${err.message}`;
- }
- return '找不到请求的媒体,它可能已被删除。';
- };
- exports.getPostOwner = (segmentId) => this.client.media.info(urlSegmentToId(segmentId))
- .then(media => media.items[0].user)
- .then(user => `${user.username}:${user.pk}`)
- .catch((err) => { throw Error(parseMediaError(err)); });
- exports.sendPost = (segmentId, receiver) => {
- this.getMedia(segmentId, this.sendMedia(`instagram media ${segmentId}`, receiver))
- .catch((err) => { this.bot.sendTo(receiver, parseMediaError(err)); });
- };
- }
- get isInactiveTime() {
- const timeToEpoch = (h = 0, m = 0) => new Date().setHours(h, m, 0, 0);
- return this.inactiveHours
- .map(rangeStr => ((start, end) => ({ start, end }))(...rangeStr.split('-', 2).map(timeStr => timeToEpoch(...timeStr.split(':', 2).map(Number)))))
- .some(range => (now => now >= range.start && now < range.end)(Date.now()));
- }
- }
- exports.default = default_1;
|