ソースを参照

switch to using ?__a=1 and graphql api

Mike L 3 年 前
コミット
0b58040b1d
2 ファイル変更129 行追加104 行削除
  1. 46 48
      dist/twitter.js
  2. 83 56
      src/twitter.ts

+ 46 - 48
dist/twitter.js

@@ -9,7 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
     });
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.sendPost = exports.getPostOwner = exports.WebshotHelpers = exports.ScreenNameNormalizer = exports.SessionManager = exports.urlSegmentToId = exports.idToUrlSegment = exports.isValidUrlSegment = exports.parseLink = exports.linkBuilder = void 0;
+exports.sendPost = exports.getPostOwner = exports.WebshotHelpers = exports.ScreenNameNormalizer = exports.SessionManager = exports.urlSegmentToId = exports.idToUrlSegment = exports.isValidUrlSegment = exports.parseLink = exports.linkBuilder = exports.graphqlLinkBuilder = void 0;
 const crypto = require("crypto");
 const fs = require("fs");
 const http = require("http");
@@ -44,6 +44,9 @@ const linkBuilder = (config) => {
         return `https://www.instagram.com/p/${config.postUrlSegment}/`;
 };
 exports.linkBuilder = linkBuilder;
+const graphqlLinkBuilder = ({ userId, first = '12', after }) => `https://www.instagram.com/graphql/query/\
+?query_id=17888483320059182&id=${userId}&first=${first}${after ? `&after=${after}` : ''}`;
+exports.graphqlLinkBuilder = graphqlLinkBuilder;
 const urlSegmentToId = (urlSegment) => urlSegment.length <= 28 ?
     instagram_id_to_url_segment_1.urlSegmentToInstagramId(urlSegment) : instagram_id_to_url_segment_1.urlSegmentToInstagramId(urlSegment.slice(0, -28));
 exports.urlSegmentToId = urlSegmentToId;
@@ -197,65 +200,60 @@ class default_1 {
             this.webshot = new webshot_1.default(this.wsUrl, this.mode, () => this.webshotCookies, doOnNewPage => {
                 this.queryUserMedia = ((userName, targetId) => {
                     let page;
-                    const url = linkBuilder({ userName });
+                    let url = linkBuilder({ userName }) + '?__a=1';
                     logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
                     return doOnNewPage(newPage => {
                         page = newPage;
-                        let timeout = this.webshotDelay;
                         const startTime = new Date().getTime();
                         const getTimerTime = () => new Date().getTime() - startTime;
-                        const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, timeout - getTimerTime());
+                        const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, this.webshotDelay - getTimerTime());
                         return page.context().addCookies(this.webshotCookies)
                             .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
                             .then(response => {
-                            if (response.status() !== 200) {
-                                const err = new Error(`error navigating to user page, error was: ${response.status()} ${response.statusText()}`);
-                                throw Object.defineProperty(err, 'name', {
-                                    value: 'ResponseError',
-                                });
-                            }
-                        }).then(() => acceptCookieConsent(page))
-                            .then(() => (next => Promise.race([
-                            browserLogin(page)
-                                .catch((err) => {
-                                if (err.name === 'TimeoutError') {
-                                    logger.warn('navigation timed out, assuming login has failed');
-                                    isWaitingForLogin = false;
+                            const responseHandler = (res) => {
+                                if (res.status() === 302) {
+                                    return acceptCookieConsent(page)
+                                        .then(() => browserLogin(page))
+                                        .catch((err) => {
+                                        if (err.name === 'TimeoutError') {
+                                            logger.warn('navigation timed out, assuming login has failed');
+                                            isWaitingForLogin = false;
+                                        }
+                                        throw err;
+                                    })
+                                        .then(() => browserSaveCookies(page))
+                                        .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
+                                        .then(responseHandler);
                                 }
-                                throw err;
-                            })
-                                .then(() => browserSaveCookies(page))
-                                .then(() => page.goto(url)).then(next),
-                            next(),
-                        ]))(() => util_1.promisify(setTimeout)(2000).then(() => page.waitForSelector('article', { timeout: getTimeout() })))).then(handle => {
-                            const postHandler = () => {
-                                const toId = (href) => { var _a; return urlSegmentToId(((_a = /\/p\/(.*)\/$/.exec(href)) !== null && _a !== void 0 ? _a : [, ''])[1]); };
-                                if (targetId === '0') {
-                                    return handle.$$eval('a', as => as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href).then(href => Number(toId(href)) > 0 ? [toId(href)] : []);
+                                if (res.status() !== 200) {
+                                    const err = new Error(`error navigating to user page, error was: ${res.status()} ${res.statusText()}`);
+                                    throw Object.defineProperty(err, 'name', {
+                                        value: 'ResponseError',
+                                    });
                                 }
-                                return handle.$$eval('a', as => as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)).then(hrefs => {
-                                    let id;
-                                    const itemIds = [];
-                                    for (const href of hrefs) {
-                                        id = toId(href);
-                                        if (id && utils_1.BigNumOps.compare(id, targetId) > 0)
-                                            itemIds.push(id);
-                                        else
-                                            return itemIds;
-                                    }
-                                    logger.info('unable to find a smaller id than target, trying on next page...');
-                                    return null;
-                                });
+                                return res.json();
                             };
-                            return postHandler().then(itemIds => {
-                                if (itemIds)
+                            const jsonHandler = ({ user }) => {
+                                const pageInfo = user.edge_owner_to_timeline_media.page_info;
+                                const itemIds = [];
+                                for (const { node } of user.edge_owner_to_timeline_media.edges) {
+                                    if (node.__typename === 'GraphVideo' && node.product_type === 'igtv')
+                                        continue;
+                                    if (node.id && utils_1.BigNumOps.compare(node.id, targetId) > 0)
+                                        itemIds.push(node.id);
+                                    else
+                                        return itemIds;
+                                }
+                                if (!pageInfo.has_next_page)
                                     return itemIds;
-                                timeout += this.workInterval * 500;
-                                return handle.$$('a')
-                                    .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
-                                    .then(loadedCount => page.waitForFunction(count => document.querySelectorAll('article a').length > count, loadedCount))
-                                    .then(postHandler);
-                            });
+                                logger.info('unable to find a smaller id than target, trying on next page...');
+                                url = graphqlLinkBuilder({ userId: user.id, after: pageInfo.end_cursor });
+                                return page.goto(url, { waitUntil: 'load', timeout: getTimeout() })
+                                    .then(responseHandler)
+                                    .then(({ data }) => jsonHandler(data));
+                            };
+                            return responseHandler(response)
+                                .then(({ graphql }) => jsonHandler(graphql));
                         }).catch((err) => {
                             if (err.name !== 'TimeoutError' && err.name !== 'ResponseError')
                                 throw err;

+ 83 - 56
src/twitter.ts

@@ -39,10 +39,14 @@ const linkBuilder = (config: ReturnType<typeof parseLink>): string => {
   if (config.postUrlSegment) return `https://www.instagram.com/p/${config.postUrlSegment}/`;
 };
 
+const graphqlLinkBuilder = ({userId, first = '12', after}: {userId: string, first?: string, after?: string}) =>
+  `https://www.instagram.com/graphql/query/\
+?query_id=17888483320059182&id=${userId}&first=${first}${after ? `&after=${after}` : ''}`;
+
 const urlSegmentToId = (urlSegment: string) => urlSegment.length <= 28 ?
   pubUrlSegmentToId(urlSegment) : pubUrlSegmentToId(urlSegment.slice(0, -28));
 
-export { linkBuilder, parseLink, isValidUrlSegment, idToUrlSegment, urlSegmentToId };
+export { graphqlLinkBuilder, linkBuilder, parseLink, isValidUrlSegment, idToUrlSegment, urlSegmentToId };
 
 interface IWorkerOption {
   sessionLockfile: string;
@@ -194,6 +198,37 @@ export let sendPost = (segmentId: string, receiver: IChat): void => {
   throw Error();
 };
 
+type IgGraphQLTimelineMediaNode = {
+  id: string,
+  display_url: string,
+  owner: {
+    id: string,
+    username?: string,
+  },
+} & (
+  {__typename: 'GraphImage'} |
+  {__typename: 'GraphSidecar', edge_sidecar_to_children: {
+    edges: {node: (IgGraphQLTimelineMediaNode & {__typename: 'GraphImage'})}[],
+  }, } |
+  {__typename: 'GraphVideo', video_url: string, product_type?: 'igtv' }
+);
+
+export type IgGraphQLUser = {
+  biography?: string,
+  fbid: string,
+  full_name: string,
+  id: string,
+  username: string,
+  edge_owner_to_timeline_media: {
+    count: number,
+    page_info: {
+      has_next_page: boolean,
+      end_cursor: string | null,
+    },
+    edges: {node: IgGraphQLTimelineMediaNode}[],
+  },
+};
+
 export type MediaItem = MediaInfoResponseItemsItem & UserFeedResponseItemsItem;
 
 export type LazyMediaItem = {
@@ -319,7 +354,10 @@ export default class {
         .then(() => page.waitForSelector('img[data-testid="user-avatar"]', { timeout: this.webshotDelay }))
         .then(() => browserSaveCookies(page))
         .catch((err: Error) => {
-          if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; }
+          if (err.name === 'TimeoutError') {
+            logger.warn('navigation timed out, assuming login has failed');
+            isWaitingForLogin = false;
+          }
           throw err;
         });
     ScreenNameNormalizer._queryUser = this.queryUser;
@@ -349,69 +387,58 @@ export default class {
       doOnNewPage => {
         this.queryUserMedia = ((userName, targetId) => {
           let page: Page;
-          const url = linkBuilder({ userName });
+          let url = linkBuilder({userName}) + '?__a=1';
           logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
           return doOnNewPage(newPage => {
             page = newPage;
-            let timeout = this.webshotDelay;
             const startTime = new Date().getTime();
             const getTimerTime = () => new Date().getTime() - startTime;
-            const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, timeout - getTimerTime());
+            const getTimeout = () => isWaitingForLogin ? 0 : Math.max(90000, this.webshotDelay - getTimerTime());
             return page.context().addCookies(this.webshotCookies)
-              .then(() => page.goto(url, { waitUntil: 'load', timeout: getTimeout() }))
+              .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()}))
               .then(response => {
-                if (response.status() !== 200) {
-                  const err = new Error(
-                    `error navigating to user page, error was: ${response.status()} ${response.statusText()}`
-                  );
-                  throw Object.defineProperty(err, 'name', {
-                    value: 'ResponseError',
-                  });
-                }
-              }).then(() => acceptCookieConsent(page))
-              .then(() =>
-                (next => Promise.race([
-                  browserLogin(page)
-                    .catch((err: Error) => {
-                      if (err.name === 'TimeoutError') { logger.warn('navigation timed out, assuming login has failed'); isWaitingForLogin = false; }
-                      throw err;
-                    })
-                    .then(() => browserSaveCookies(page))
-                    .then(() => page.goto(url)).then(next),
-                  next(),
-                ]))(() => promisify(setTimeout)(2000).then(() => page.waitForSelector('article', {timeout: getTimeout()})))
-              ).then(handle => {
-                const postHandler = () => {
-                  const toId = (href: string) => urlSegmentToId((/\/p\/(.*)\/$/.exec(href) ?? [,''])[1]);
-                  if (targetId === '0') {
-                    return handle.$$eval('a', as =>
-                      as.filter(a => !a.querySelector('[aria-label="IGTV"]'))[0].href
-                    ).then(href => Number(toId(href)) > 0 ? [toId(href)] : []);
+                const responseHandler = (res: typeof response): ReturnType<typeof response.json> => {
+                  if (res.status() === 302) {
+                    return acceptCookieConsent(page)
+                      .then(() => browserLogin(page))
+                      .catch((err: Error) => {
+                        if (err.name === 'TimeoutError') {
+                          logger.warn('navigation timed out, assuming login has failed');
+                          isWaitingForLogin = false;
+                        }
+                        throw err;
+                      })
+                      .then(() => browserSaveCookies(page))
+                      .then(() => page.goto(url, {waitUntil: 'load', timeout: getTimeout()}))
+                      .then(responseHandler);
+                  }
+                  if (res.status() !== 200) {
+                    const err = new Error(
+                      `error navigating to user page, error was: ${res.status()} ${res.statusText()}`
+                    );
+                    throw Object.defineProperty(err, 'name', {
+                      value: 'ResponseError',
+                    });
+                  }
+                  return res.json();
+                };
+                const jsonHandler = ({user}: {user: IgGraphQLUser}): string[] | Promise<string[]> => {
+                  const pageInfo = user.edge_owner_to_timeline_media.page_info;
+                  const itemIds: string[] = [];
+                  for (const {node} of user.edge_owner_to_timeline_media.edges) {
+                    if (node.__typename === 'GraphVideo' && node.product_type === 'igtv') continue;
+                    if (node.id && BigNumOps.compare(node.id, targetId) > 0) itemIds.push(node.id);
+                    else return itemIds;
                   }
-                  return handle.$$eval('a', as =>
-                    as.filter(a => !a.querySelector('[aria-label="IGTV"]')).map(a => a.href)
-                  ).then(hrefs => {
-                    let id: string;
-                    const itemIds: string[] = [];
-                    for (const href of hrefs) {
-                      id = toId(href);
-                      if (id && BigNumOps.compare(id, targetId) > 0) itemIds.push(id);
-                      else return itemIds;
-                    }
-                    logger.info('unable to find a smaller id than target, trying on next page...');
-                    return null; // has more
-                  });
+                  if (!pageInfo.has_next_page) return itemIds;
+                  logger.info('unable to find a smaller id than target, trying on next page...');
+                  url = graphqlLinkBuilder({userId: user.id, after: pageInfo.end_cursor});
+                  return page.goto(url, {waitUntil: 'load', timeout: getTimeout()})
+                    .then(responseHandler)
+                    .then(({data}: {data: {user: IgGraphQLUser}}) => jsonHandler(data));
                 };
-                return postHandler().then(itemIds => {
-                  if (itemIds) return itemIds;
-                  timeout += this.workInterval * 500;
-                  return handle.$$('a')
-                    .then(as => { as.pop().scrollIntoViewIfNeeded(); return as.length + 1; })
-                    .then(loadedCount => page.waitForFunction(count =>
-                      document.querySelectorAll('article a').length > count
-                    , loadedCount))
-                    .then(postHandler);
-                });
+                return responseHandler(response)
+                  .then(({graphql}: {graphql: {user: IgGraphQLUser}}) => jsonHandler(graphql));
               }).catch((err: Error) => {
                 if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err;
                 if (err.name === 'ResponseError') {