Browse Source

revert to user feed api

Mike L 3 years ago
parent
commit
d28d004145
2 changed files with 54 additions and 162 deletions
  1. 28 78
      dist/twitter.js
  2. 26 84
      src/twitter.ts

+ 28 - 78
dist/twitter.js

@@ -192,80 +192,7 @@ class default_1 {
     constructor(opt) {
         this.webshotCookies = [];
         this.launch = () => {
-            this.webshot = new webshot_1.default(this.wsUrl, this.mode, () => this.webshotCookies, doOnNewPage => {
-                this.queryUserMedia = ((userName, targetId) => {
-                    let page;
-                    const url = linkBuilder({ userName });
-                    logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
-                    return doOnNewPage(newPage => {
-                        page = newPage;
-                        let timeout = this.workInterval * 1000;
-                        const startTime = new Date().getTime();
-                        const getTimerTime = () => new Date().getTime() - startTime;
-                        const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, timeout - getTimerTime());
-                        return page.context().addCookies(this.webshotCookies)
-                            .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
-                            .then(response => {
-                            if (response.status() !== 200) {
-                                const err = new Error(`error navigating to user page, error was: ${response.status()} ${response.statusText()}`);
-                                throw Object.defineProperty(err, 'name', {
-                                    value: 'ResponseError',
-                                });
-                            }
-                        }).then(() => acceptCookieConsent(page))
-                            .then(() => (next => Promise.race([
-                            browserLogin(page)
-                                .catch((err) => {
-                                if (err.name === 'TimeoutError') {
-                                    logger.warn('navigation timed out, assuming login has failed');
-                                    isWaitingForLogin = false;
-                                }
-                                throw err;
-                            })
-                                .then(() => browserSaveCookies(page))
-                                .then(() => page.goto(url)).then(next),
-                            next(),
-                        ]))(() => util_1.promisify(setTimeout)(2000).then(() => page.waitForSelector('article', { timeout: getTimeout() })))).then(handle => {
-                            const postHandler = () => {
-                                const toId = (href) => { var _a; return instagram_id_to_url_segment_1.urlSegmentToInstagramId(((_a = /\/p\/(.*)\/$/.exec(href)) !== null && _a !== void 0 ? _a : [, ''])[1]); };
-                                if (targetId === '0') {
-                                    return handle.$$eval('a', as => as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href).then(href => Number(toId(href)) > 0 ? [toId(href)] : []);
-                                }
-                                return handle.$$eval('a', as => as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)).then(hrefs => {
-                                    let id;
-                                    const itemIds = [];
-                                    for (const href of hrefs) {
-                                        id = toId(href);
-                                        if (id && utils_1.BigNumOps.compare(id, targetId) > 0)
-                                            itemIds.push(id);
-                                        else
-                                            return itemIds;
-                                    }
-                                    logger.info('unable to find a smaller id than target, trying on next page...');
-                                    return null;
-                                });
-                            };
-                            return postHandler().then(itemIds => {
-                                if (itemIds)
-                                    return itemIds;
-                                timeout += this.workInterval * 500;
-                                return handle.$$('a')
-                                    .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
-                                    .then(loadedCount => page.waitForFunction(count => document.querySelectorAll('article a').length > count, loadedCount))
-                                    .then(postHandler);
-                            });
-                        }).catch((err) => {
-                            if (err.name !== 'TimeoutError' && err.name !== 'ResponseError')
-                                throw err;
-                            if (err.name === 'ResponseError') {
-                                logger.warn(`error while fetching tweets for ${userName}: ${err.message}`);
-                            }
-                            else
-                                logger.warn(`navigation timed out at ${getTimerTime()} ms`);
-                            return [];
-                        }).then(itemIds => util_1.promisify(setTimeout)(getTimeout()).then(() => itemIds.map(id => this.lazyGetMediaById(id))));
-                    }).finally(() => { page.close(); });
-                });
+            this.webshot = new webshot_1.default(this.wsUrl, this.mode, () => this.webshotCookies, () => {
                 setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length);
             });
         };
@@ -325,11 +252,34 @@ class default_1 {
                         logger.error(`current feed "${currentFeed}" is invalid, please remove this feed manually`);
                         return [];
                     }
-                    return this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset)
-                        .catch((error) => {
-                        logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
-                        return [];
+                    const feed = this.client.feed.user(lock.threads[currentFeed].id);
+                    const newer = (item) => utils_1.BigNumOps.compare(item.pk, lock.threads[currentFeed].offset) > 0;
+                    const fetchMore = () => new Promise(fetch => {
+                        feed.request().then(response => {
+                            if (response.items.length === 0)
+                                return fetch([]);
+                            if (response.items.every(newer)) {
+                                fetchMore().then(fetched => fetch(response.items.concat(fetched)));
+                            }
+                            else
+                                fetch(response.items.filter(newer));
+                        }, (error) => {
+                            if (error instanceof instagram_private_api_1.IgNetworkError) {
+                                logger.warn(`error on fetching media for ${currentFeed}: ${JSON.stringify(error.cause)}`);
+                                if (!(error instanceof instagram_private_api_1.IgNotFoundError))
+                                    return;
+                                lock.threads[currentFeed].subscribers.forEach(subscriber => {
+                                    logger.info(`sending notfound message of ${currentFeed} to ${JSON.stringify(subscriber)}`);
+                                    this.bot.sendTo(subscriber, `链接 ${currentFeed} 指向的用户或列表不存在,请退订。`).catch();
+                                });
+                            }
+                            else {
+                                logger.error(`unhandled error on fetching media for ${currentFeed}: ${JSON.stringify(error)}`);
+                            }
+                            fetch([]);
+                        });
                     });
+                    return fetchMore().then(items => items.map(item => ({ pk: item.pk, item: () => Promise.resolve(item) })));
                 });
                 promise.then((mediaItems) => {
                     const currentThread = lock.threads[currentFeed];

+ 26 - 84
src/twitter.ts

@@ -11,9 +11,10 @@ import {
 } from 'instagram-id-to-url-segment';
 import {
   IgApiClient,
-  IgClientError, IgExactUserNotFoundError, IgLoginTwoFactorRequiredError, IgResponseError,
+  IgClientError, IgExactUserNotFoundError, IgLoginTwoFactorRequiredError, IgNetworkError, IgNotFoundError, IgResponseError,
   MediaInfoResponseItemsItem, UserFeedResponseItemsItem
 } from 'instagram-private-api';
+import { RequestError } from 'request-promise/errors';
 import { SocksProxyAgent } from 'socks-proxy-agent';
 
 import { getLogger } from './loggers';
@@ -341,90 +342,12 @@ export default class {
       this.wsUrl,
       this.mode,
       () => this.webshotCookies,
-      doOnNewPage => {
-        this.queryUserMedia = ((userName, targetId) => {
-          let page: Page;
-          const url = linkBuilder({ userName });
-          logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
-          return doOnNewPage(newPage => {
-            page = newPage;
-            let timeout = this.workInterval * 1000;
-            const startTime = new Date().getTime();
-            const getTimerTime = () => new Date().getTime() - startTime;
-            const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, timeout - getTimerTime());
-            return page.context().addCookies(this.webshotCookies)
-              .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
-              .then(response => {
-                if (response.status() !== 200) {
-                  const err = new Error(
-                    `error navigating to user page, error was: ${response.status()} ${response.statusText()}`
-                  );
-                  throw Object.defineProperty(err, 'name', {
-                    value: 'ResponseError',
-                  });
-                }
-              }).then(() => acceptCookieConsent(page))
-              .then(() =>
-                (next => Promise.race([
-                  browserLogin(page)
-                    .catch((err: Error) => {
-                      if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; }
-                      throw err;
-                    })
-                    .then(() => browserSaveCookies(page))
-                    .then(() => page.goto(url)).then(next),
-                  next(),
-                ]))(() => promisify(setTimeout)(2000).then(() => page.waitForSelector('article', {timeout: getTimeout()})))
-              ).then(handle => {
-                const postHandler = () => {
-                  const toId = (href: string) => urlSegmentToId((/\/p\/(.*)\/$/.exec(href) ?? [,''])[1]);
-                  if (targetId === '0') {
-                    return handle.$$eval('a', as =>
-                      as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href
-                    ).then(href => Number(toId(href)) > 0 ? [toId(href)] : []);
-                  }
-                  return handle.$$eval('a', as =>
-                    as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)
-                  ).then(hrefs => {
-                    let id: string;
-                    const itemIds: string[] = [];
-                    for (const href of hrefs) {
-                      id = toId(href);
-                      if (id && BigNumOps.compare(id, targetId) > 0) itemIds.push(id);
-                      else return itemIds;
-                    }
-                    logger.info('unable to find a smaller id than target, trying on next page...');
-                    return null; // has more
-                  });
-                };
-                return postHandler().then(itemIds => {
-                  if (itemIds) return itemIds;
-                  timeout += this.workInterval * 500;
-                  return handle.$$('a')
-                    .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
-                    .then(loadedCount => page.waitForFunction(count =>
-                      document.querySelectorAll('article a').length > count
-                    , loadedCount))
-                    .then(postHandler);
-                });
-              }).catch((err: Error) => {
-                if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err;
-                if (err.name === 'ResponseError') {
-                  logger.warn(`error while fetching tweets for ${userName}: ${err.message}`);
-                } else logger.warn(`navigation timed out at ${getTimerTime()} ms`);
-                return [] as string[];
-              }).then(itemIds => promisify(setTimeout)(getTimeout()).then(() =>
-                itemIds.map(id => this.lazyGetMediaById(id))
-              ));
-          }).finally(() => { page.close(); });
-        });
+      () => {
         setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length);
       }
     );
   };
 
-  public queryUserMedia: (username: string, targetId?: string) => Promise<LazyMediaItem[]>;
-
   public queryUser = (username: string) => this.client.user.searchExact(username)
     .then(user => `${user.username}:${user.pk}`);
 
@@ -504,11 +427,30 @@ export default class {
             logger.error(`current feed "${currentFeed}" is invalid, please remove this feed manually`);
             return [] as LazyMediaItem[];
           }
-          return this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset)
-            .catch((error: Error) => {
-              logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
-              return [] as LazyMediaItem[];
+          const feed = this.client.feed.user(lock.threads[currentFeed].id);
+          const newer = (item: UserFeedResponseItemsItem) =>
+            BigNumOps.compare(item.pk, lock.threads[currentFeed].offset) > 0;
+          const fetchMore = () => new Promise<UserFeedResponseItemsItem[]>(fetch => {
+            feed.request().then(response => {
+              if (response.items.length === 0) return fetch([]);
+              if (response.items.every(newer)) {
+                fetchMore().then(fetched => fetch(response.items.concat(fetched)));
+              } else fetch(response.items.filter(newer));
+            }, (error: IgClientError & Partial<RequestError>) => {
+              if (error instanceof IgNetworkError) {
+                logger.warn(`error on fetching media for ${currentFeed}: ${JSON.stringify(error.cause)}`);
+                if (!(error instanceof IgNotFoundError)) return;
+                lock.threads[currentFeed].subscribers.forEach(subscriber => {
+                  logger.info(`sending notfound message of ${currentFeed} to ${JSON.stringify(subscriber)}`);
+                  this.bot.sendTo(subscriber, `链接 ${currentFeed} 指向的用户或列表不存在,请退订。`).catch();
+                });
+              } else {
+                logger.error(`unhandled error on fetching media for ${currentFeed}: ${JSON.stringify(error)}`);
+              }
+              fetch([]);
             });
+          });
+          return fetchMore().then(items => items.map(item => ({pk: item.pk, item: () => Promise.resolve(item)})));
         });
 
         promise.then((mediaItems: LazyMediaItem[]) => {