|
@@ -11,9 +11,10 @@ import {
|
|
|
} from 'instagram-id-to-url-segment';
|
|
|
import {
|
|
|
IgApiClient,
|
|
|
- IgClientError, IgExactUserNotFoundError, IgLoginTwoFactorRequiredError, IgResponseError,
|
|
|
+ IgClientError, IgExactUserNotFoundError, IgLoginTwoFactorRequiredError, IgNetworkError, IgNotFoundError, IgResponseError,
|
|
|
MediaInfoResponseItemsItem, UserFeedResponseItemsItem
|
|
|
} from 'instagram-private-api';
|
|
|
+import { RequestError } from 'request-promise/errors';
|
|
|
import { SocksProxyAgent } from 'socks-proxy-agent';
|
|
|
|
|
|
import { getLogger } from './loggers';
|
|
@@ -341,90 +342,12 @@ export default class {
|
|
|
this.wsUrl,
|
|
|
this.mode,
|
|
|
() => this.webshotCookies,
|
|
|
- doOnNewPage => {
|
|
|
- this.queryUserMedia = ((userName, targetId) => {
|
|
|
- let page: Page;
|
|
|
- const url = linkBuilder({ userName });
|
|
|
- logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
|
|
|
- return doOnNewPage(newPage => {
|
|
|
- page = newPage;
|
|
|
- let timeout = this.workInterval * 1000;
|
|
|
- const startTime = new Date().getTime();
|
|
|
- const getTimerTime = () => new Date().getTime() - startTime;
|
|
|
- const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, timeout - getTimerTime());
|
|
|
- return page.context().addCookies(this.webshotCookies)
|
|
|
- .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
|
|
|
- .then(response => {
|
|
|
- if (response.status() !== 200) {
|
|
|
- const err = new Error(
|
|
|
- `error navigating to user page, error was: ${response.status()} ${response.statusText()}`
|
|
|
- );
|
|
|
- throw Object.defineProperty(err, 'name', {
|
|
|
- value: 'ResponseError',
|
|
|
- });
|
|
|
- }
|
|
|
- }).then(() => acceptCookieConsent(page))
|
|
|
- .then(() =>
|
|
|
- (next => Promise.race([
|
|
|
- browserLogin(page)
|
|
|
- .catch((err: Error) => {
|
|
|
- if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; }
|
|
|
- throw err;
|
|
|
- })
|
|
|
- .then(() => browserSaveCookies(page))
|
|
|
- .then(() => page.goto(url)).then(next),
|
|
|
- next(),
|
|
|
- ]))(() => promisify(setTimeout)(2000).then(() => page.waitForSelector('article', {timeout: getTimeout()})))
|
|
|
- ).then(handle => {
|
|
|
- const postHandler = () => {
|
|
|
- const toId = (href: string) => urlSegmentToId((/\/p\/(.*)\/$/.exec(href) ?? [,''])[1]);
|
|
|
- if (targetId === '0') {
|
|
|
- return handle.$$eval('a', as =>
|
|
|
- as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href
|
|
|
- ).then(href => Number(toId(href)) > 0 ? [toId(href)] : []);
|
|
|
- }
|
|
|
- return handle.$$eval('a', as =>
|
|
|
- as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)
|
|
|
- ).then(hrefs => {
|
|
|
- let id: string;
|
|
|
- const itemIds: string[] = [];
|
|
|
- for (const href of hrefs) {
|
|
|
- id = toId(href);
|
|
|
- if (id && BigNumOps.compare(id, targetId) > 0) itemIds.push(id);
|
|
|
- else return itemIds;
|
|
|
- }
|
|
|
- logger.info('unable to find a smaller id than target, trying on next page...');
|
|
|
- return null; // has more
|
|
|
- });
|
|
|
- };
|
|
|
- return postHandler().then(itemIds => {
|
|
|
- if (itemIds) return itemIds;
|
|
|
- timeout += this.workInterval * 500;
|
|
|
- return handle.$$('a')
|
|
|
- .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
|
|
|
- .then(loadedCount => page.waitForFunction(count =>
|
|
|
- document.querySelectorAll('article a').length > count
|
|
|
- , loadedCount))
|
|
|
- .then(postHandler);
|
|
|
- });
|
|
|
- }).catch((err: Error) => {
|
|
|
- if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err;
|
|
|
- if (err.name === 'ResponseError') {
|
|
|
- logger.warn(`error while fetching tweets for ${userName}: ${err.message}`);
|
|
|
- } else logger.warn(`navigation timed out at ${getTimerTime()} ms`);
|
|
|
- return [] as string[];
|
|
|
- }).then(itemIds => promisify(setTimeout)(getTimeout()).then(() =>
|
|
|
- itemIds.map(id => this.lazyGetMediaById(id))
|
|
|
- ));
|
|
|
- }).finally(() => { page.close(); });
|
|
|
- });
|
|
|
+ () => {
|
|
|
setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length);
|
|
|
}
|
|
|
);
|
|
|
};
|
|
|
|
|
|
- public queryUserMedia: (username: string, targetId?: string) => Promise<LazyMediaItem[]>;
|
|
|
-
|
|
|
public queryUser = (username: string) => this.client.user.searchExact(username)
|
|
|
.then(user => `${user.username}:${user.pk}`);
|
|
|
|
|
@@ -504,11 +427,30 @@ export default class {
|
|
|
logger.error(`current feed "${currentFeed}" is invalid, please remove this feed manually`);
|
|
|
return [] as LazyMediaItem[];
|
|
|
}
|
|
|
- return this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset)
|
|
|
- .catch((error: Error) => {
|
|
|
- logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
|
|
|
- return [] as LazyMediaItem[];
|
|
|
+ const feed = this.client.feed.user(lock.threads[currentFeed].id);
|
|
|
+ const newer = (item: UserFeedResponseItemsItem) =>
|
|
|
+ BigNumOps.compare(item.pk, lock.threads[currentFeed].offset) > 0;
|
|
|
+ const fetchMore = () => new Promise<UserFeedResponseItemsItem[]>(fetch => {
|
|
|
+ feed.request().then(response => {
|
|
|
+ if (response.items.length === 0) return fetch([]);
|
|
|
+ if (response.items.every(newer)) {
|
|
|
+ fetchMore().then(fetched => fetch(response.items.concat(fetched)));
|
|
|
+ } else fetch(response.items.filter(newer));
|
|
|
+ }, (error: IgClientError & Partial<RequestError>) => {
|
|
|
+ if (error instanceof IgNetworkError) {
|
|
|
+ logger.warn(`error on fetching media for ${currentFeed}: ${JSON.stringify(error.cause)}`);
|
|
|
+ if (!(error instanceof IgNotFoundError)) return;
|
|
|
+ lock.threads[currentFeed].subscribers.forEach(subscriber => {
|
|
|
+ logger.info(`sending notfound message of ${currentFeed} to ${JSON.stringify(subscriber)}`);
|
|
|
+ this.bot.sendTo(subscriber, `链接 ${currentFeed} 指向的用户或列表不存在,请退订。`).catch();
|
|
|
+ });
|
|
|
+ } else {
|
|
|
+ logger.error(`unhandled error on fetching media for ${currentFeed}: ${JSON.stringify(error)}`);
|
|
|
+ }
|
|
|
+ fetch([]);
|
|
|
});
|
|
|
+ });
|
|
|
+ return fetchMore().then(items => items.map(item => ({pk: item.pk, item: () => Promise.resolve(item)})));
|
|
|
});
|
|
|
|
|
|
promise.then((mediaItems: LazyMediaItem[]) => {
|