Browse Source

ditch ?__a=1 to cope with renamed users

Mike L 3 years ago
parent
commit
9fbd58e465
2 changed files with 35 additions and 25 deletions
  1. 17 13
      dist/twitter.js
  2. 18 12
      src/twitter.ts

+ 17 - 13
dist/twitter.js

@@ -196,11 +196,13 @@ class default_1 {
         this.webshotCookies = [];
         this.launch = () => {
             this.webshot = new webshot_1.default(this.wsUrl, this.mode, () => this.webshotCookies, doOnNewPage => {
-                this.queryUserMedia = ((userName, targetId) => {
+                this.queryUserMedia = ((username, targetId) => {
                     let page;
-                    let url = linkBuilder({ userName }) + '?__a=1';
-                    logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
-                    return doOnNewPage(newPage => {
+                    let url;
+                    return (username.includes(':') ? Promise.resolve(username) : this.queryUser(username)).then(userNameId => doOnNewPage(newPage => {
+                        const [userName, userId] = userNameId.split(':');
+                        url = graphqlLinkBuilder({ userId });
+                        logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
                         page = newPage;
                         let timeout = this.webshotDelay / 2;
                         const startTime = new Date().getTime();
@@ -253,7 +255,7 @@ class default_1 {
                                 if (!(pageInfo === null || pageInfo === void 0 ? void 0 : pageInfo.has_next_page))
                                     return itemIds;
                                 logger.info('unable to find a smaller id than target, trying on next page...');
-                                url = graphqlLinkBuilder({ userId: user.id, after: pageInfo.end_cursor });
+                                url = graphqlLinkBuilder({ userId, after: pageInfo.end_cursor });
                                 const nextPageDelay = this.webshotDelay * (0.4 + Math.random() * 0.1);
                                 timeout += nextPageDelay;
                                 return util_1.promisify(setTimeout)(nextPageDelay)
@@ -262,18 +264,19 @@ class default_1 {
                                     .then(({ data }) => jsonHandler(data));
                             };
                             return responseHandler(response)
-                                .then(({ graphql }) => jsonHandler(graphql));
+                                .then(({ data }) => jsonHandler(data));
                         }).catch((err) => {
-                            if (err.name !== 'TimeoutError' && err.name !== 'ResponseError')
-                                throw err;
-                            if (err.name === 'ResponseError') {
-                                logger.warn(`error while fetching posts by @${userName}: ${err.message}`);
+                            if (err.name === 'ResponseError' || err.name === 'TypeError') {
+                                logger.warn(`error while fetching posts by @${userName}: ${err}`);
                             }
-                            else
+                            else if (err.name === 'TimeoutError') {
                                 logger.warn(`navigation timed out at ${getTimerTime()} ms`);
+                            }
+                            else
+                                throw err;
                             return [];
                         }).then(itemIds => util_1.promisify(setTimeout)(getTimeout()).then(() => itemIds.map(id => this.lazyGetMediaById(id))));
-                    }).finally(() => { page.close(); });
+                    })).finally(() => { page.close(); });
                 });
                 setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length);
             });
@@ -318,7 +321,8 @@ class default_1 {
                 logger.error(`current feed "${feed}" is invalid, please remove this feed manually`);
                 return resolve([]);
             }
-            return resolve(this.queryUserMedia(match[1], this.lock.threads[feed].offset)
+            const userNameId = `${match[1]}:${this.lock.threads[feed].id}`;
+            return resolve(this.queryUserMedia(userNameId, this.lock.threads[feed].offset)
                 .catch((error) => {
                 logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
                 return [];

+ 18 - 12
src/twitter.ts

@@ -403,11 +403,15 @@ export default class {
       this.mode,
       () => this.webshotCookies,
       doOnNewPage => {
-        this.queryUserMedia = ((userName, targetId) => {
+        this.queryUserMedia = ((username, targetId) => {
           let page: Page;
-          let url = linkBuilder({userName}) + '?__a=1';
-          logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
-          return doOnNewPage(newPage => {
+          let url: string;
+          return (
+            username.includes(':') ? Promise.resolve(username) : this.queryUser(username)
+          ).then(userNameId => doOnNewPage(newPage => {
+            const [userName, userId] = userNameId.split(':');
+            url = graphqlLinkBuilder({userId});
+            logger.debug(`pulling ${targetId !== '0' ? `feed ${url} up to ${targetId}` : `top of feed ${url}`}...`);
             page = newPage;
             let timeout = this.webshotDelay / 2;
             const startTime = new Date().getTime();
@@ -463,7 +467,7 @@ export default class {
                   if (!pageInfo?.has_next_page) return itemIds;
                   // else, fetch next page using end_cursor
                   logger.info('unable to find a smaller id than target, trying on next page...');
-                  url = graphqlLinkBuilder({userId: user.id, after: pageInfo.end_cursor});
+                  url = graphqlLinkBuilder({userId, after: pageInfo.end_cursor});
                   const nextPageDelay = this.webshotDelay * (0.4 + Math.random() * 0.1);
                   timeout += nextPageDelay;
                   return promisify(setTimeout)(nextPageDelay)
@@ -472,17 +476,18 @@ export default class {
                     .then(({data}: {data: {user: IgGraphQLUser}}) => jsonHandler(data));
                 };
                 return responseHandler(response)
-                  .then(({graphql}: {graphql: {user: IgGraphQLUser}}) => jsonHandler(graphql));
+                  .then(({data}: {data: {user: IgGraphQLUser}}) => jsonHandler(data));
               }).catch((err: Error) => {
-                if (err.name !== 'TimeoutError' && err.name !== 'ResponseError') throw err;
-                if (err.name === 'ResponseError') {
-                  logger.warn(`error while fetching posts by @${userName}: ${err.message}`);
-                } else logger.warn(`navigation timed out at ${getTimerTime()} ms`);
+                if (err.name === 'ResponseError' || err.name === 'TypeError') {
+                  logger.warn(`error while fetching posts by @${userName}: ${err}`);
+                } else if (err.name === 'TimeoutError') {
+                  logger.warn(`navigation timed out at ${getTimerTime()} ms`);
+                } else throw err;
                 return [] as string[];
               }).then(itemIds => promisify(setTimeout)(getTimeout()).then(() =>
                 itemIds.map(id => this.lazyGetMediaById(id))
               ));
-          }).finally(() => { page.close(); });
+          })).finally(() => { page.close(); });
         });
         setTimeout(this.work, this.workInterval * 1000 / this.lock.feed.length);
       }
@@ -547,7 +552,8 @@ export default class {
       logger.error(`current feed "${feed}" is invalid, please remove this feed manually`);
       return resolve([]);
     }
-    return resolve(this.queryUserMedia(match[1], this.lock.threads[feed].offset)
+    const userNameId = `${match[1]}:${this.lock.threads[feed].id}`;
+    return resolve(this.queryUserMedia(userNameId, this.lock.threads[feed].offset)
       .catch((error: Error) => {
         logger.error(`error scraping media off profile page of ${match[1]}, error: ${error}`);
         return [];