Ver código fonte

attempt to use playwright to circumvent http 429

Mike L 4 anos atrás
pai
commit
c53a594fae
4 arquivos alterados com 315 adições e 158 exclusões
  1. 113 57
      dist/twitter.js
  2. 32 24
      dist/webshot.js
  3. 126 49
      src/twitter.ts
  4. 44 28
      src/webshot.ts

+ 113 - 57
dist/twitter.js

@@ -9,7 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
     });
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.sendPost = exports.getPostOwner = exports.browserLogin = exports.ScreenNameNormalizer = exports.SessionManager = exports.urlSegmentToId = exports.idToUrlSegment = exports.isValidUrlSegment = exports.parseLink = exports.linkBuilder = void 0;
+exports.sendPost = exports.getPostOwner = exports.WebshotHelpers = exports.ScreenNameNormalizer = exports.SessionManager = exports.urlSegmentToId = exports.idToUrlSegment = exports.isValidUrlSegment = exports.parseLink = exports.linkBuilder = void 0;
 const fs = require("fs");
 const path = require("path");
 const instagram_id_to_url_segment_1 = require("instagram-id-to-url-segment");
@@ -99,7 +99,15 @@ class ScreenNameNormalizer {
 exports.ScreenNameNormalizer = ScreenNameNormalizer;
 ScreenNameNormalizer.normalize = (username) => `${username.toLowerCase().replace(/^@/, '')}:`;
 let browserLogin = (page) => Promise.reject();
-exports.browserLogin = browserLogin;
+let browserSaveCookies = browserLogin;
+const acceptCookieConsent = (page) => page.click('button:has-text("すべて許可")', { timeout: 5000 })
+    .then(() => logger.info('accepted cookie consent'))
+    .catch((err) => { if (err.name !== 'TimeoutError')
+    throw err; });
+exports.WebshotHelpers = {
+    handleLogin: browserLogin,
+    handleCookieConsent: acceptCookieConsent,
+};
 let getPostOwner = (segmentId) => Promise.reject();
 exports.getPostOwner = getPostOwner;
 let sendPost = (segmentId, receiver) => {
@@ -136,18 +144,93 @@ class default_1 {
     constructor(opt) {
         this.webshotCookies = [];
         this.launch = () => {
-            this.webshot = new webshot_1.default(this.wsUrl, this.mode, () => this.webshotCookies, () => setTimeout(this.work, this.workInterval * 1000));
+            this.webshot = new webshot_1.default(this.wsUrl, this.mode, () => this.webshotCookies, doOnNewPage => {
+                this.queryUserMedia = ((userName, targetId) => {
+                    let page;
+                    const url = linkBuilder({ userName });
+                    logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
+                    return doOnNewPage(newPage => {
+                        page = newPage;
+                        let timeout = this.webshotDelay;
+                        const startTime = new Date().getTime();
+                        const getTimerTime = () => new Date().getTime() - startTime;
+                        const getTimeout = () => Math.max(500, timeout - getTimerTime());
+                        return page.context().addCookies(this.webshotCookies)
+                            .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
+                            .then(response => {
+                            if (response.status() !== 200) {
+                                const err = new Error(`error navigating to user page, error was: ${response.status()} ${response.statusText()}`);
+                                throw Object.defineProperty(err, 'name', {
+                                    value: 'ResponseError',
+                                });
+                            }
+                        }).then(() => acceptCookieConsent(page))
+                            .then(() => (next => Promise.race([
+                            browserLogin(page)
+                                .catch((err) => {
+                                if (err.name === 'TimeoutError')
+                                    logger.warn('navigation timed out, assuming login has failed');
+                                throw err;
+                            })
+                                .then(() => browserSaveCookies(page))
+                                .then(() => page.goto(url)).then(next),
+                            next(),
+                        ]))(() => page.waitForSelector('article', { timeout: getTimeout() }))).then(handle => {
+                            const postHandler = () => {
+                                const toId = (href) => { var _a; return instagram_id_to_url_segment_1.urlSegmentToInstagramId(((_a = /\/p\/(.*)\/$/.exec(href)) !== null && _a !== void 0 ? _a : [])[1]); };
+                                if (targetId === '0') {
+                                    return handle.$$eval('a', as => as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href).then(href => href ? [toId(href)] : null);
+                                }
+                                return handle.$$eval('a', as => as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)).then(hrefs => {
+                                    let id;
+                                    const itemIds = [];
+                                    for (const href of hrefs) {
+                                        id = toId(href);
+                                        if (id && utils_1.BigNumOps.compare(id, targetId) > 0)
+                                            itemIds.push(id);
+                                        else
+                                            return itemIds;
+                                    }
+                                    return null;
+                                });
+                            };
+                            return postHandler().then(itemIds => {
+                                if (itemIds)
+                                    return itemIds;
+                                timeout += this.webshotDelay / 2;
+                                return handle.$$('a')
+                                    .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
+                                    .then(loadedCount => page.waitForFunction(count => document.querySelectorAll('article a').length > count, loadedCount))
+                                    .then(postHandler);
+                            });
+                        }).catch((err) => {
+                            if (err.name !== 'TimeoutError' && err.name !== 'ResponseError')
+                                throw err;
+                            if (err.name === 'ResponseError') {
+                                logger.warn(`error while fetching tweets for ${userName}: ${err.message}`);
+                            }
+                            else
+                                logger.warn(`navigation timed out at ${getTimerTime()} ms`);
+                            return [];
+                        }).then(itemIds => itemIds.map(id => this.lazyGetMediaById(id)));
+                    }).finally(() => { page.close(); });
+                });
+                setTimeout(this.work, this.workInterval * 1000);
+            });
         };
         this.queryUser = (username) => this.client.user.searchExact(username)
             .then(user => `${user.username}:${user.pk}`);
-        this.workOnMedia = (mediaItems, sendMedia) => this.webshot(mediaItems, sendMedia, this.webshotDelay);
+        this.workOnMedia = (lazyMediaItems, sendMedia) => this.webshot(lazyMediaItems, sendMedia, this.webshotDelay);
         this.urlSegmentToId = instagram_id_to_url_segment_1.urlSegmentToInstagramId;
-        this.getMedia = (segmentId, sender) => this.client.media.info(instagram_id_to_url_segment_1.urlSegmentToInstagramId(segmentId))
-            .then(media => {
-            const mediaItem = media.items[0];
-            logger.debug(`api returned media post ${JSON.stringify(mediaItem)} for query id=${segmentId}`);
-            return this.workOnMedia([mediaItem], sender);
+        this.lazyGetMediaById = (id) => ({
+            pk: id,
+            item: () => this.client.media.info(id).then(media => {
+                const mediaItem = media.items[0];
+                logger.debug(`api returned media post ${JSON.stringify(mediaItem)} for query id=${id}`);
+                return mediaItem;
+            }),
         });
+        this.getMedia = (segmentId, sender) => this.workOnMedia([this.lazyGetMediaById(instagram_id_to_url_segment_1.urlSegmentToInstagramId(segmentId))], sender);
         this.sendMedia = (source, ...to) => (msg, text, author) => {
             to.forEach(subscriber => {
                 logger.info(`pushing data${source ? ` of ${koishi_1.Message.ellipseBase64(source)}` : ''} to ${JSON.stringify(subscriber)}`);
@@ -186,39 +269,12 @@ class default_1 {
                 return;
             }
             const currentFeed = lock.feed[lock.workon];
-            logger.debug(`pulling feed ${currentFeed}`);
             const promise = new Promise(resolve => {
                 const match = /https:\/\/www\.instagram\.com\/([^\/]+)/.exec(currentFeed);
                 if (match) {
-                    const feed = this.client.feed.user(lock.threads[currentFeed].id);
-                    const newer = (item) => utils_1.BigNumOps.compare(item.pk, lock.threads[currentFeed].offset) > 0;
-                    const fetchMore = () => new Promise(fetch => {
-                        feed.request().then(response => {
-                            if (response.items.length === 0)
-                                return fetch([]);
-                            if (response.items.every(newer)) {
-                                fetchMore().then(fetched => fetch(response.items.concat(fetched)));
-                            }
-                            else
-                                fetch(response.items.filter(newer));
-                        }, (error) => {
-                            if (error instanceof instagram_private_api_1.IgNetworkError) {
-                                logger.warn(`error on fetching media for ${currentFeed}: ${JSON.stringify(error.cause)}`);
-                                if (!(error instanceof instagram_private_api_1.IgNotFoundError))
-                                    return;
-                                lock.threads[currentFeed].subscribers.forEach(subscriber => {
-                                    logger.info(`sending notfound message of ${currentFeed} to ${JSON.stringify(subscriber)}`);
-                                    this.bot.sendTo(subscriber, `链接 ${currentFeed} 指向的用户或列表不存在,请退订。`).catch();
-                                });
-                            }
-                            else {
-                                logger.error(`unhandled error on fetching media for ${currentFeed}: ${JSON.stringify(error)}`);
-                            }
-                            fetch([]);
-                        });
-                    });
-                    fetchMore().then(resolve);
+                    resolve(this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset));
                 }
+                resolve([]);
             });
             promise.then((mediaItems) => {
                 const currentThread = lock.threads[currentFeed];
@@ -268,25 +324,25 @@ class default_1 {
             logger.warn(`failed to load webshot cookies from file ${this.webshotCookiesLockfile}: `, err.message);
             logger.warn('cookies will be saved to this file when needed');
         }
-        exports.browserLogin = (page) => {
-            logger.warn('blocked by login dialog, trying to log in manually...');
-            return page.type('input[name="username"]', opt.credentials[0])
-                .then(() => page.type('input[name="password"]', opt.credentials[1]))
-                .then(() => page.click('button[type="submit"]'))
-                .then(() => page.click('button:has-text("情報を保存")'))
-                .then(() => page.waitForSelector('img[data-testid="user-avatar"]', { timeout: this.webshotDelay }))
-                .then(() => page.context().cookies())
-                .then(cookies => {
-                this.webshotCookies = cookies;
-                logger.info('successfully logged in, saving cookies to file...');
-                fs.writeFileSync(path.resolve(this.webshotCookiesLockfile), JSON.stringify(cookies, null, 2), 'utf-8');
-            })
-                .catch((err) => {
-                if (err.name === 'TimeoutError')
-                    logger.warn('navigation timed out, assuming login has failed');
-                throw err;
-            });
-        };
+        browserLogin = page => page.fill('input[name="username"]', opt.credentials[0])
+            .then(() => logger.warn('blocked by login dialog, trying to log in manually...'))
+            .then(() => page.fill('input[name="password"]', opt.credentials[1]))
+            .then(() => page.click('button[type="submit"]'))
+            .then(() => page.click('button:has-text("情報を保存")'));
+        browserSaveCookies = page => page.context().cookies()
+            .then(cookies => {
+            this.webshotCookies = cookies;
+            logger.info('successfully logged in, saving cookies to file...');
+            fs.writeFileSync(path.resolve(this.webshotCookiesLockfile), JSON.stringify(cookies, null, 2), 'utf-8');
+        });
+        exports.WebshotHelpers.handleLogin = page => browserLogin(page)
+            .then(() => page.waitForSelector('img[data-testid="user-avatar"]', { timeout: this.webshotDelay }))
+            .then(() => browserSaveCookies(page))
+            .catch((err) => {
+            if (err.name === 'TimeoutError')
+                logger.warn('navigation timed out, assuming login has failed');
+            throw err;
+        });
         ScreenNameNormalizer._queryUser = this.queryUser;
         const parseMediaError = (err) => {
             if (!(err instanceof instagram_private_api_1.IgResponseError && err.text === 'Media not found or unavailable')) {

+ 32 - 24
dist/webshot.js

@@ -49,6 +49,20 @@ class Webshot extends CallableInstance {
             return util_1.promisify(setTimeout)(2500)
                 .then(() => this.connect(onready));
         };
+        this.performOnNewPage = (action, zoomFactor = 2, reconnectOnError = true) => this.browser.newPage({
+            bypassCSP: true,
+            deviceScaleFactor: zoomFactor,
+            locale: 'ja-JP',
+            timezoneId: 'Asia/Tokyo',
+            userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
+        }).then(action)
+            .catch(error => {
+            if (reconnectOnError) {
+                return this.reconnect(error)
+                    .then(() => this.performOnNewPage(action, zoomFactor, reconnectOnError));
+            }
+            throw error;
+        });
         this.renderWebshot = (url, height, webshotDelay) => {
             temp.track();
             const jpeg = (data) => data.pipe(sharp()).jpeg({ quality: 90, trellisQuantisation: true });
@@ -60,14 +74,7 @@ class Webshot extends CallableInstance {
                 const width = 720;
                 const zoomFactor = 2;
                 logger.info(`shooting ${width}*${height} webshot for ${url}`);
-                this.browser.newPage({
-                    bypassCSP: true,
-                    deviceScaleFactor: zoomFactor,
-                    locale: 'ja-JP',
-                    timezoneId: 'Asia/Tokyo',
-                    userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
-                })
-                    .then(page => {
+                this.performOnNewPage(page => {
                     const startTime = new Date().getTime();
                     const getTimerTime = () => new Date().getTime() - startTime;
                     const getTimeout = () => Math.max(500, webshotDelay - getTimerTime());
@@ -76,13 +83,13 @@ class Webshot extends CallableInstance {
                         height: height / zoomFactor,
                     }).then(() => page.context().addCookies(this.getCookies()))
                         .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
+                        .then(() => twitter_1.WebshotHelpers.handleCookieConsent(page))
                         .then(() => ((next) => Promise.race([
-                        page.click('button:has-text("すべて許可")').then(() => twitter_1.browserLogin(page))
+                        twitter_1.WebshotHelpers.handleLogin(page)
                             .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
-                            .then(() => next),
-                        page.click('button:has-text("すべて許可")').then(() => next),
-                        next,
-                    ]))(page.waitForSelector('article', { timeout: getTimeout() })))
+                            .then(next),
+                        next(),
+                    ]))(() => page.waitForSelector('article', { timeout: getTimeout() })))
                         .catch((err) => {
                         if (err.name !== 'TimeoutError')
                             throw err;
@@ -108,7 +115,7 @@ class Webshot extends CallableInstance {
                                 if (this.data[idx(zoomFactor, y)] <= 38 &&
                                     this.data[idx(zoomFactor, y)] === this.data[idx(this.width - zoomFactor, y)] &&
                                     this.data[idx(zoomFactor, y + zoomFactor)] === this.data[idx(zoomFactor, y - 2 * zoomFactor)]) {
-                                    boundary = y;
+                                    boundary = y - 1;
                                     break;
                                 }
                             }
@@ -140,7 +147,7 @@ class Webshot extends CallableInstance {
                         resolve({ path: '', boundary: 0 });
                     })
                         .finally(() => { page.close(); });
-                })
+                }, zoomFactor, false)
                     .catch(reject);
             });
             return promise.then(data => {
@@ -191,20 +198,20 @@ class Webshot extends CallableInstance {
         else {
             this.getCookies = getCookies;
             this.wsUrl = wsUrl;
-            this.connect(onready);
+            this.connect(() => onready(this.performOnNewPage));
         }
     }
-    webshot(mediaItems, callback, webshotDelay) {
-        let promise = new Promise(resolve => {
-            resolve();
-        });
-        mediaItems.forEach(item => {
+    webshot(lazyMediaItems, callback, webshotDelay) {
+        let grandPromise = Promise.resolve();
+        lazyMediaItems.forEach(lazyItem => grandPromise = grandPromise.then(lazyItem.item).then(item => {
+            var _a;
+            let promise = Promise.resolve();
             promise = promise.then(() => {
                 logger.info(`working on ${item.user.username}/${item.code}`);
             });
             let messageChain = '';
             const author = `${item.user.full_name} (@${item.user.username}):\n`;
-            const text = item.caption.text;
+            const text = ((_a = item.caption) === null || _a === void 0 ? void 0 : _a.text) || '';
             if (this.mode > 0)
                 messageChain += (author + xmlEntities.decode(text));
             if (this.mode === 0) {
@@ -251,8 +258,9 @@ class Webshot extends CallableInstance {
                 logger.info(JSON.stringify(koishi_1.Message.ellipseBase64(messageChain)));
                 callback(messageChain, xmlEntities.decode(text), author);
             });
-        });
-        return promise;
+            return promise;
+        }));
+        return grandPromise;
     }
 }
 exports.default = Webshot;

+ 126 - 49
src/twitter.ts

@@ -6,10 +6,9 @@ import {
 } from 'instagram-id-to-url-segment';
 import {
   IgApiClient,
-  IgClientError, IgExactUserNotFoundError, IgNetworkError, IgNotFoundError, IgResponseError,
+  IgClientError, IgExactUserNotFoundError, IgResponseError,
   MediaInfoResponseItemsItem, UserFeedResponseItemsItem
 } from 'instagram-private-api';
-import { RequestError } from 'request-promise/errors';
 
 import { getLogger } from './loggers';
 import QQBot, { Message } from './koishi';
@@ -116,7 +115,19 @@ export class ScreenNameNormalizer {
   }
 }
 
-export let browserLogin = (page: Page): Promise<void> => Promise.reject();
+let browserLogin = (page: Page): Promise<void> => Promise.reject();
+
+let browserSaveCookies = browserLogin;
+
+const acceptCookieConsent = (page: Page) =>
+  page.click('button:has-text("すべて許可")', {timeout: 5000})
+    .then(() => logger.info('accepted cookie consent'))
+    .catch((err: Error) => { if (err.name !== 'TimeoutError') throw err; });
+
+export const WebshotHelpers = {
+  handleLogin: browserLogin,
+  handleCookieConsent: acceptCookieConsent,
+};
 
 export let getPostOwner = (segmentId: string): Promise<string> => Promise.reject();
 
@@ -126,6 +137,11 @@ export let sendPost = (segmentId: string, receiver: IChat): void => {
 
 export type MediaItem = MediaInfoResponseItemsItem & UserFeedResponseItemsItem;
 
+export type LazyMediaItem = {
+  pk: string,
+  item: () => Promise<MediaItem>,
+};
+
 const logger = getLogger('instagram');
 const maxTrials = 3;
 const retryInterval = 1500;
@@ -195,24 +211,27 @@ export default class {
       logger.warn('cookies will be saved to this file when needed');
     }
 
-    browserLogin = (page) => {
-      logger.warn('blocked by login dialog, trying to log in manually...');
-      return page.type('input[name="username"]', opt.credentials[0])
-        .then(() => page.type('input[name="password"]', opt.credentials[1]))
+    browserLogin = page =>
+      page.fill('input[name="username"]', opt.credentials[0])
+        .then(() => logger.warn('blocked by login dialog, trying to log in manually...'))
+        .then(() => page.fill('input[name="password"]', opt.credentials[1]))
         .then(() => page.click('button[type="submit"]'))
-        .then(() => page.click('button:has-text("情報を保存")'))
-        .then(() => page.waitForSelector('img[data-testid="user-avatar"]', {timeout: this.webshotDelay}))
-        .then(() => page.context().cookies())
+        .then(() => page.click('button:has-text("情報を保存")'));
+    browserSaveCookies = page =>
+      page.context().cookies()
         .then(cookies => {
           this.webshotCookies = cookies;
           logger.info('successfully logged in, saving cookies to file...');
           fs.writeFileSync(path.resolve(this.webshotCookiesLockfile), JSON.stringify(cookies, null, 2), 'utf-8');
-        })
+        });
+    WebshotHelpers.handleLogin = page =>
+      browserLogin(page)
+        .then(() => page.waitForSelector('img[data-testid="user-avatar"]', {timeout: this.webshotDelay}))
+        .then(() => browserSaveCookies(page))
         .catch((err: Error) => {
           if (err.name === 'TimeoutError') logger.warn('navigation timed out, assuming login has failed');
           throw err;
         });
-    };
     ScreenNameNormalizer._queryUser = this.queryUser;
     const parseMediaError = (err: IgClientError) => {
       if (!(err instanceof IgResponseError && err.text === 'Media not found or unavailable')) {
@@ -237,27 +256,108 @@ export default class {
       this.wsUrl,
       this.mode,
       () => this.webshotCookies,
-      () => setTimeout(this.work, this.workInterval * 1000)
+      doOnNewPage => {
+        this.queryUserMedia = ((userName, targetId) => {
+          let page: Page;
+          const url = linkBuilder({userName});
+          logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
+          return doOnNewPage(newPage => {
+            page = newPage;
+            let timeout = this.webshotDelay;
+            const startTime = new Date().getTime();
+            const getTimerTime = () => new Date().getTime() - startTime;
+            const getTimeout = () => Math.max(500, timeout - getTimerTime());
+            return page.context().addCookies(this.webshotCookies)
+              .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()}))
+              .then(response => {
+                if (response.status() !== 200) {
+                  const err = new Error(
+                    `error navigating to user page, error was: ${response.status()} ${response.statusText()}`
+                  );
+                  throw Object.defineProperty(err, 'name', {
+                    value: 'ResponseError',
+                  });
+                }
+              }).then(() => acceptCookieConsent(page))
+              .then(() => 
+                (next => Promise.race([
+                  browserLogin(page)
+                    .catch((err: Error) => {
+                      if (err.name === 'TimeoutError') logger.warn('navigation timed out, assuming login has failed');
+                      throw err;
+                    })
+                    .then(() => browserSaveCookies(page))
+                    .then(() => page.goto(url)).then(next),
+                  next(),
+                ]))(() => page.waitForSelector('article', {timeout: getTimeout()}))
+              ).then(handle => {
+                const postHandler = () => {
+                  const toId = (href: string) => urlSegmentToId((/\/p\/(.*)\/$/.exec(href) ?? [])[1]);
+                  if (targetId === '0') {
+                    return handle.$$eval('a', as =>
+                      as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href
+                    ).then(href => href ? [toId(href)] : null);
+                  }
+                  return handle.$$eval('a', as =>
+                    as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)
+                  ).then(hrefs => {
+                    let id: string;
+                    const itemIds: string[] = [];
+                    for (const href of hrefs) {
+                      id = toId(href);
+                      if (id && BigNumOps.compare(id, targetId) > 0) itemIds.push(id);
+                      else return itemIds;
+                    }
+                    return null; // has more
+                  });
+                };
+                return postHandler().then(itemIds => {
+                  if (itemIds) return itemIds;
+                  timeout += this.webshotDelay / 2;
+                  return handle.$$('a')
+                    .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
+                    .then(loadedCount => page.waitForFunction(count =>
+                      document.querySelectorAll('article a').length > count
+                    , loadedCount))
+                    .then(postHandler);
+                });
+              }).catch((err: Error) => {
+                if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err;
+                if (err.name === 'ResponseError') {
+                  logger.warn(`error while fetching tweets for ${userName}: ${err.message}`);
+                } else logger.warn(`navigation timed out at ${getTimerTime()} ms`);
+                return [] as string[];
+              }).then(itemIds => itemIds.map(id => this.lazyGetMediaById(id)));
+          }).finally(() => { page.close(); });
+        });
+        setTimeout(this.work, this.workInterval * 1000);
+      }
     );
   };
 
+  public queryUserMedia: (username: string, targetId?: string) => Promise<LazyMediaItem[]>;
+
   public queryUser = (username: string) => this.client.user.searchExact(username)
     .then(user => `${user.username}:${user.pk}`);
 
   private workOnMedia = (
-    mediaItems: MediaItem[],
+    lazyMediaItems: LazyMediaItem[],
     sendMedia: (msg: string, text: string, author: string) => void
-  ) => this.webshot(mediaItems, sendMedia, this.webshotDelay);
+  ) => this.webshot(lazyMediaItems, sendMedia, this.webshotDelay);
 
   public urlSegmentToId = urlSegmentToId;
 
-  public getMedia = (segmentId: string, sender: (msg: string, text: string, author: string) => void) =>
-    this.client.media.info(urlSegmentToId(segmentId))
-      .then(media => {
-        const mediaItem = media.items[0] as MediaItem;
-        logger.debug(`api returned media post ${JSON.stringify(mediaItem)} for query id=${segmentId}`);
-        return this.workOnMedia([mediaItem], sender);
-      });
+  public lazyGetMediaById = (id: string): LazyMediaItem => ({
+    pk: id,
+    item: () => this.client.media.info(id).then(media => {
+      const mediaItem = media.items[0] as MediaItem;
+      logger.debug(`api returned media post ${JSON.stringify(mediaItem)} for query id=${id}`);
+      return mediaItem;
+    }),
+  });
+
+  private getMedia = (segmentId: string, sender: (msg: string, text: string, author: string) => void) =>
+    this.workOnMedia([this.lazyGetMediaById(urlSegmentToId(segmentId))], sender);
 
   private sendMedia = (source?: string, ...to: IChat[]) => (msg: string, text: string, author: string) => {
     to.forEach(subscriber => {
@@ -298,39 +398,16 @@ export default class {
     }
 
     const currentFeed = lock.feed[lock.workon];
-    logger.debug(`pulling feed ${currentFeed}`);
 
-    const promise = new Promise<UserFeedResponseItemsItem[]>(resolve => {
+    const promise = new Promise<LazyMediaItem[]>(resolve => {
       const match = /https:\/\/www\.instagram\.com\/([^\/]+)/.exec(currentFeed);
       if (match) {
-        const feed = this.client.feed.user(lock.threads[currentFeed].id);
-        const newer = (item: UserFeedResponseItemsItem) =>
-          BigNumOps.compare(item.pk, lock.threads[currentFeed].offset) > 0;
-        const fetchMore = () => new Promise<UserFeedResponseItemsItem[]>(fetch => {
-          feed.request().then(response => {
-            if (response.items.length === 0) return fetch([]);
-            if (response.items.every(newer)) {
-              fetchMore().then(fetched => fetch(response.items.concat(fetched)));
-            } else fetch(response.items.filter(newer));
-          }, (error: IgClientError & Partial<RequestError>) => {
-            if (error instanceof IgNetworkError) {
-              logger.warn(`error on fetching media for ${currentFeed}: ${JSON.stringify(error.cause)}`);
-              if (!(error instanceof IgNotFoundError)) return;
-              lock.threads[currentFeed].subscribers.forEach(subscriber => {
-                logger.info(`sending notfound message of ${currentFeed} to ${JSON.stringify(subscriber)}`);
-                this.bot.sendTo(subscriber, `链接 ${currentFeed} 指向的用户或列表不存在,请退订。`).catch();
-              });
-            } else {
-              logger.error(`unhandled error on fetching media for ${currentFeed}: ${JSON.stringify(error)}`);
-            }
-            fetch([]);
-          });
-        });
-        fetchMore().then(resolve);
+        resolve(this.queryUserMedia(match[1], this.lock.threads[currentFeed].offset));
       }
+      resolve([]);
     });
 
-    promise.then((mediaItems: MediaItem[]) => {
+    promise.then((mediaItems: LazyMediaItem[]) => {
       const currentThread = lock.threads[currentFeed];
 
       const updateDate = () => currentThread.updatedAt = new Date().toString();

+ 44 - 28
src/webshot.ts

@@ -13,7 +13,7 @@ import * as temp from 'temp';
 import { getLogger } from './loggers';
 import { Message } from './koishi';
 import { chainPromises } from './utils';
-import { browserLogin, linkBuilder, MediaItem } from './twitter';
+import { linkBuilder, MediaItem, LazyMediaItem, WebshotHelpers } from './twitter';
 
 const xmlEntities = new XmlEntities();
 
@@ -32,14 +32,18 @@ const logger = getLogger('webshot');
 export type Page = puppeteer.Page;
 export type Cookies = puppeteer.Cookie[];
 
-class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number], Promise<void>> {
+class Webshot extends CallableInstance<[LazyMediaItem[], (...args) => void, number], Promise<void>> {
 
   private browser: puppeteer.Browser;
   private mode: number;
   private wsUrl: string;
   private getCookies: () => Cookies;
 
-  constructor(wsUrl: string, mode: number, getCookies: () => Cookies, onready?: (...args) => void) {
+  constructor(
+    wsUrl: string, mode: number,
+    getCookies: () => Cookies,
+    onready?: (doOnNewPage?: typeof Webshot.prototype.performOnNewPage) => void
+  ) {
     super('webshot');
     // tslint:disable-next-line: no-conditional-assignment
     // eslint-disable-next-line no-cond-assign
@@ -48,7 +52,7 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
     } else {
       this.getCookies = getCookies;
       this.wsUrl = wsUrl;
-      this.connect(onready);
+      this.connect(() => onready(this.performOnNewPage));
     }
   }
 
@@ -74,6 +78,22 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
       .then(() => this.connect(onready));
   };
 
+  private performOnNewPage = <T>(action: (page: Page) => T | PromiseLike<T>, zoomFactor = 2, reconnectOnError = true) =>
+    this.browser.newPage({
+      bypassCSP: true,
+      deviceScaleFactor: zoomFactor,
+      locale: 'ja-JP',
+      timezoneId: 'Asia/Tokyo',
+      userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
+    }).then(action)
+      .catch(error => {
+        if (reconnectOnError) {
+          return this.reconnect(error)
+            .then((): Promise<T> => this.performOnNewPage(action, zoomFactor, reconnectOnError));
+        }
+        throw error;
+      });
+
   private renderWebshot = (url: string, height: number, webshotDelay: number): Promise<string> => {
     temp.track();
     const jpeg = (data: Readable) => data.pipe(sharp()).jpeg({quality: 90, trellisQuantisation: true});
@@ -85,14 +105,8 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
       const width = 720;
       const zoomFactor = 2;
       logger.info(`shooting ${width}*${height} webshot for ${url}`);
-      this.browser.newPage({
-        bypassCSP: true,
-        deviceScaleFactor: zoomFactor,
-        locale: 'ja-JP',
-        timezoneId: 'Asia/Tokyo',
-        userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
-      })
-        .then(page => {
+      this.performOnNewPage(
+        page => {
           const startTime = new Date().getTime();
           const getTimerTime = () => new Date().getTime() - startTime;
           const getTimeout = () => Math.max(500, webshotDelay - getTimerTime());
@@ -101,14 +115,14 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
             height: height / zoomFactor,
           }).then(() => page.context().addCookies(this.getCookies()))
             .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()}))
+            .then(() => WebshotHelpers.handleCookieConsent(page))
             .then(() =>
-              (<T>(next: Promise<T>) => Promise.race([
-                page.click('button:has-text("すべて許可")').then(() => browserLogin(page))
+              (<T>(next: () => Promise<T>) => Promise.race([
+                WebshotHelpers.handleLogin(page)
                   .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()}))
-                  .then(() => next),
-                page.click('button:has-text("すべて許可")').then(() => next),
-                next,
-              ]))(page.waitForSelector('article', {timeout: getTimeout()}))
+                  .then(next),
+                next(),
+              ]))(() => page.waitForSelector('article', {timeout: getTimeout()}))
             )
             .catch((err: Error): Promise<puppeteer.ElementHandle<Element> | null> => {
               if (err.name !== 'TimeoutError') throw err;
@@ -138,7 +152,7 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
                     this.data[idx(zoomFactor, y)] === this.data[idx(this.width - zoomFactor, y)] &&
                     this.data[idx(zoomFactor, y + zoomFactor)] === this.data[idx(zoomFactor, y - 2 * zoomFactor)]
                   ) {
-                    boundary = y;
+                    boundary = y - 1;
                     break;
                   }
                 }
@@ -168,7 +182,8 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
               resolve({path: '', boundary: 0});
             })
             .finally(() => { page.close(); });
-        })
+        },
+        zoomFactor, false)
         .catch(reject);
     });
     return promise.then(data => {
@@ -216,14 +231,14 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
   );
 
   public webshot(
-    mediaItems: MediaItem[],
+    lazyMediaItems: LazyMediaItem[],
     callback: (msgs: string, text: string, author: string) => void,
     webshotDelay: number
   ): Promise<void> {
-    let promise = new Promise<void>(resolve => {
-      resolve();
-    });
-    mediaItems.forEach(item => {
+    let grandPromise = Promise.resolve();
+    // eslint-disable-next-line @typescript-eslint/no-misused-promises
+    lazyMediaItems.forEach(lazyItem => grandPromise = grandPromise.then(lazyItem.item).then(item => {
+      let promise = Promise.resolve();
       promise = promise.then(() => {
         logger.info(`working on ${item.user.username}/${item.code}`);
       });
@@ -231,7 +246,7 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
 
       // text processing
       const author = `${item.user.full_name} (@${item.user.username}):\n`;
-      const text = item.caption.text;
+      const text = item.caption?.text || '';
       if (this.mode > 0) messageChain += (author + xmlEntities.decode(text));
 
       // invoke webshot
@@ -286,8 +301,9 @@ class Webshot extends CallableInstance<[MediaItem[], (...args) => void, number],
         logger.info(JSON.stringify(Message.ellipseBase64(messageChain)));
         callback(messageChain, xmlEntities.decode(text), author);
       });
-    });
-    return promise;
+      return promise;
+    }));
+    return grandPromise;
   }
 }