@@ -11,9 +11,10 @@ import {
} from 'instagram-id-to-url-segment';
import {
- IgClientError, IgExactUserNotFoundError, IgLoginTwoFactorRequiredError, IgResponseError,
+ IgClientError, IgExactUserNotFoundError, IgLoginTwoFactorRequiredError, IgNetworkError, IgNotFoundError, IgResponseError,
MediaInfoResponseItemsItem, UserFeedResponseItemsItem
} from 'instagram-private-api';
+import { RequestError } from 'request-promise/errors';
import { SocksProxyAgent } from 'socks-proxy-agent';
import { getLogger } from './loggers';
@@ -341,90 +342,12 @@ export default class {
() => this.webshotCookies,
- doOnNewPage => {
- this.queryUserMedia = ((userName, targetId) => {
- let page: Page;
- const url = linkBuilder({ userName });
- logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
- return doOnNewPage(newPage => {
- page = newPage;
- let timeout = this.workInterval * 1000;
- const startTime = new Date().getTime();
- const getTimerTime = () => new Date().getTime() - startTime;
- const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, timeout - getTimerTime());
- return page.context().addCookies(this.webshotCookies)
- .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
- .then(response => {
- if (response.status() !== 200) {
- const err = new Error(
- `error navigating to user page, error was: ${response.status()} ${response.statusText()}`
- );
- throw Object.defineProperty(err, 'name', {
- value: 'ResponseError',
- });
- }
- }).then(() => acceptCookieConsent(page))
- .then(() =>
- (next => Promise.race([
- browserLogin(page)
- .catch((err: Error) => {
- if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; }
- throw err;
- })
- .then(() => browserSaveCookies(page))
- .then(() => page.goto(url)).then(next),
- next(),
- ]))(() => promisify(setTimeout)(2000).then(() => page.waitForSelector('article', {timeout: getTimeout()})))
- ).then(handle => {
- const postHandler = () => {
- const toId = (href: string) => urlSegmentToId((/\/p\/(.*)\/$/.exec(href) ?? [,''])[1]);
- if (targetId === '0') {
- return handle.$$eval('a', as =>
- as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href
- ).then(href => Number(toId(href)) > 0 ? [toId(href)] : []);
- }
- return handle.$$eval('a', as =>
- as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)
- ).then(hrefs => {
- let id: string;
- const itemIds: string[] = [];
- for (const href of hrefs) {
- id = toId(href);
- if (id && BigNumOps.compare(id, targetId) > 0) itemIds.push(id);
- else return itemIds;
- }
- logger.info('unable to find a smaller id than target, trying on next page...');
- return null; // has more
- });
- };
- return postHandler().then(itemIds => {
- if (itemIds) return itemIds;
- timeout += this.workInterval * 500;
- return handle.$$('a')
- .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
- .then(loadedCount => page.waitForFunction(count =>
- document.querySelectorAll('article a').length > count
- , loadedCount))
- .then(postHandler);
- });
- }).catch((err: Error) => {
- if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err;
- if (err.name === 'ResponseError') {
- logger.warn(`error while fetching tweets for ${userName}: ${err.message}`);
- } else logger.warn(`navigation timed out at ${getTimerTime()} ms`);
- return [] as string[];
- }).then(itemIds => promisify(setTimeout)(getTimeout()).then(() =>
- itemIds.map(id => this.lazyGetMediaById(id))
- ));
- }).finally(() => { page.close(); });
- });
+ () => {
setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length);
- public queryUserMedia: (username: string, targetId?: string) => Promise<LazyMediaItem[]>;
public queryUser = (username: string) => this.client.user.searchExact(username)
.then(user => `${user.username}:${user.pk}`);
@@ -504,11 +427,30 @@ export default class {
logger.error(`current feed "${currentFeed}" is invalid, please remove this feed manually`);
return [] as LazyMediaItem[];
- return this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset)
- .catch((error: Error) => {
- logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
- return [] as LazyMediaItem[];
+ const feed = this.client.feed.user(lock.threads[currentFeed].id);
+ const newer = (item: UserFeedResponseItemsItem) =>
+ BigNumOps.compare(item.pk, lock.threads[currentFeed].offset) > 0;
+ const fetchMore = () => new Promise<UserFeedResponseItemsItem[]>(fetch => {
+ feed.request().then(response => {
+ if (response.items.length === 0) return fetch([]);
+ if (response.items.every(newer)) {
+ fetchMore().then(fetched => fetch(response.items.concat(fetched)));
+ } else fetch(response.items.filter(newer));
+ }, (error: IgClientError & Partial<RequestError>) => {
+ if (error instanceof IgNetworkError) {
+ logger.warn(`error on fetching media for ${currentFeed}: ${JSON.stringify(error.cause)}`);
+ if (!(error instanceof IgNotFoundError)) return;
+ lock.threads[currentFeed].subscribers.forEach(subscriber => {
+ logger.info(`sending notfound message of ${currentFeed} to ${JSON.stringify(subscriber)}`);
+ this.bot.sendTo(subscriber, `链接 ${currentFeed} 指向的用户或列表不存在,请退订。`).catch();
+ });
+ } else {
+ logger.error(`unhandled error on fetching media for ${currentFeed}: ${JSON.stringify(error)}`);
+ }
+ fetch([]);
+ });
+ return fetchMore().then(items => items.map(item => ({pk: item.pk, item: () => Promise.resolve(item)})));
promise.then((mediaItems: LazyMediaItem[]) => {