downloader.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. import os
  2. import shutil
  3. import subprocess
  4. import sys
  5. import threading
  6. import time
  7. import shlex
  8. from instagram_private_api import ClientConnectionError
  9. from instagram_private_api import ClientError
  10. from instagram_private_api import ClientThrottledError
  11. from instagram_private_api_extensions import live
  12. from instagram_private_api_extensions import replay
  13. from .comments import CommentsDownloader
  14. from .logger import log
  15. from .logger import seperator
  16. def main(instagram_api_arg, download_arg, settings_arg):
  17. global instagram_api
  18. global user_to_download
  19. global broadcast
  20. global settings
  21. settings = settings_arg
  22. instagram_api = instagram_api_arg
  23. user_to_download = download_arg
  24. get_user_info(user_to_download)
  25. def run_command(command):
  26. try:
  27. FNULL = open(os.devnull, 'w')
  28. subprocess.Popen(shlex.split(command), stdout=FNULL, stderr=subprocess.STDOUT)
  29. except OSError as e:
  30. pass
  31. def get_stream_duration(compare_time, broadcast=None):
  32. try:
  33. had_wrong_time = False
  34. if broadcast:
  35. if (int(time.time()) < int(compare_time)):
  36. had_wrong_time = True
  37. corrected_compare_time = int(compare_time) - 5
  38. download_time = int(time.time()) - int(corrected_compare_time)
  39. else:
  40. download_time = int(time.time()) - int(compare_time)
  41. stream_time = int(time.time()) - int(broadcast.get('published_time'))
  42. stream_started_mins, stream_started_secs = divmod(stream_time - download_time, 60)
  43. else:
  44. if (int(time.time()) < int(compare_time)):
  45. had_wrong_time = True
  46. corrected_compare_time = int(compare_time) - 5
  47. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(corrected_compare_time)), 60)
  48. else:
  49. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(compare_time)), 60)
  50. stream_duration_str = '%d minutes' % stream_started_mins
  51. if stream_started_secs:
  52. stream_duration_str += ' and %d seconds' % stream_started_secs
  53. if had_wrong_time:
  54. return "{:s} (corrected)".format(stream_duration_str)
  55. else:
  56. return stream_duration_str
  57. except Exception as e:
  58. return "Not available"
  59. def download_livestream(broadcast):
  60. try:
  61. def print_status(sep=True):
  62. heartbeat_info = instagram_api.broadcast_heartbeat_and_viewercount(broadcast.get('id'))
  63. viewers = broadcast.get('viewer_count', 0)
  64. if sep:
  65. seperator("GREEN")
  66. log('[I] Viewers : {:s} watching'.format(str(int(viewers))), "GREEN")
  67. log('[I] Airing time : {:s}'.format(get_stream_duration(broadcast.get('published_time'))), "GREEN")
  68. log('[I] Status : {:s}'.format(heartbeat_info.get('broadcast_status').title()), "GREEN")
  69. return heartbeat_info.get('broadcast_status') not in ['active', 'interrupted']
  70. mpd_url = (broadcast.get('dash_manifest')
  71. or broadcast.get('dash_abr_playback_url')
  72. or broadcast.get('dash_playback_url'))
  73. output_dir = '{}{}_{}_{}_{}_live_downloads'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  74. broadcast_downloader = live.Downloader(
  75. mpd=mpd_url,
  76. output_dir=output_dir,
  77. user_agent=instagram_api.user_agent,
  78. max_connection_error_retry=3,
  79. duplicate_etag_retry=30,
  80. callback_check=print_status,
  81. mpd_download_timeout=3,
  82. download_timeout=3)
  83. except Exception as e:
  84. log('[E] Could not start downloading livestream: {:s}'.format(str(e)), "RED")
  85. seperator("GREEN")
  86. sys.exit(1)
  87. try:
  88. log('[I] Livestream found, beginning download...', "GREEN")
  89. broadcast_owner = broadcast.get('broadcast_owner', {}).get('username')
  90. try:
  91. broadcast_guest = broadcast.get('cobroadcasters', {})[0].get('username')
  92. except:
  93. broadcast_guest = None
  94. if (broadcast_owner != user_to_download):
  95. log('[I] This livestream is a dual-live, the owner is "{}".'.format(broadcast_owner), "BLUE")
  96. broadcast_guest = None
  97. if broadcast_guest:
  98. log('[I] This livestream is a dual-live, the current guest is "{}".'.format(broadcast_guest), "BLUE")
  99. seperator("GREEN")
  100. log('[I] Username : {:s}'.format(user_to_download), "GREEN")
  101. print_status(False)
  102. log('[I] MPD URL : {:s}'.format(mpd_url), "GREEN")
  103. seperator("GREEN")
  104. open(os.path.join(output_dir,'folder.lock'), 'a').close()
  105. log('[I] Downloading livestream... press [CTRL+C] to abort.', "GREEN")
  106. if (settings.run_at_start is not "None"):
  107. try:
  108. thread = threading.Thread(target=run_command, args=(settings.run_at_start,))
  109. thread.daemon = True
  110. thread.start()
  111. log("[I] Command executed: \033[94m{:s}".format(settings.run_at_start), "GREEN")
  112. except Exception as e:
  113. log('[W] Could not execute command: {:s}'.format(str(e)), "YELLOW")
  114. comment_thread_worker = None
  115. if settings.save_comments.title() == "True":
  116. try:
  117. comments_json_file = os.path.join(output_dir, '{}_{}_{}_{}_live_comments.json'.format(settings.current_date, user_to_download, broadcast.get('id'), settings.current_time))
  118. comment_thread_worker = threading.Thread(target=get_live_comments, args=(instagram_api, broadcast, comments_json_file, broadcast_downloader,))
  119. comment_thread_worker.start()
  120. except Exception as e:
  121. log('[E] An error occurred while checking comments: {:s}'.format(str(e)), "RED")
  122. broadcast_downloader.run()
  123. seperator("GREEN")
  124. log('[I] The livestream has ended.\n[I] Download duration : {}\n[I] Stream duration : {}\n[I] Missing (approx.) : {}'.format(get_stream_duration(int(settings.current_time)), get_stream_duration(broadcast.get('published_time')), get_stream_duration(int(settings.current_time), broadcast)), "YELLOW")
  125. seperator("GREEN")
  126. stitch_video(broadcast_downloader, broadcast, comment_thread_worker)
  127. except KeyboardInterrupt:
  128. seperator("GREEN")
  129. log('[I] The download has been aborted by the user.\n[I] Download duration : {}\n[I] Stream duration : {}\n[I] Missing (approx.) : {}'.format(get_stream_duration(int(settings.current_time)), get_stream_duration(broadcast.get('published_time')), get_stream_duration(int(settings.current_time), broadcast)), "YELLOW")
  130. seperator("GREEN")
  131. if not broadcast_downloader.is_aborted:
  132. broadcast_downloader.stop()
  133. stitch_video(broadcast_downloader, broadcast, comment_thread_worker)
  134. except Exception as e:
  135. log("[E] Could not download livestream: {:s}".format(str(e)), "RED")
  136. try:
  137. os.remove(os.path.join(output_dir, 'folder.lock'))
  138. except OSError:
  139. pass
  140. def stitch_video(broadcast_downloader, broadcast, comment_thread_worker):
  141. try:
  142. live_mp4_file = '{}{}_{}_{}_{}_live.mp4'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  143. live_folder_path = "{:s}_downloads".format(live_mp4_file.split('.mp4')[0])
  144. if comment_thread_worker and comment_thread_worker.is_alive():
  145. log("[I] Waiting for comment downloader to end download cycle...", "GREEN")
  146. comment_thread_worker.join()
  147. if (settings.run_at_finish is not "None"):
  148. try:
  149. thread = threading.Thread(target=run_command, args=(settings.run_at_finish,))
  150. thread.daemon = True
  151. thread.start()
  152. log("[I] Command executed: \033[94m{:s}".format(settings.run_at_finish), "GREEN")
  153. except Exception as e:
  154. log('[W] Could not execute command: {:s}'.format(str(e)), "YELLOW")
  155. log('[I] Stitching downloaded files into video...', "GREEN")
  156. try:
  157. if settings.clear_temp_files.title() == "True":
  158. broadcast_downloader.stitch(live_mp4_file, cleartempfiles=True)
  159. else:
  160. broadcast_downloader.stitch(live_mp4_file, cleartempfiles=False)
  161. log('[I] Successfully stitched downloaded files into video.', "GREEN")
  162. try:
  163. os.remove(os.path.join(live_folder_path,'folder.lock'))
  164. except OSError:
  165. pass
  166. if settings.clear_temp_files.title() == "True":
  167. try:
  168. shutil.rmtree(live_folder_path)
  169. except Exception as e:
  170. log("[E] Could not remove temp folder: {:s}".format(str(e)), "RED")
  171. seperator("GREEN")
  172. sys.exit(0)
  173. except ValueError as e:
  174. log('[E] Could not stitch downloaded files: {:s}\n[E] Likely the download duration was too short and no temp files were saved.'.format(str(e)), "RED")
  175. seperator("GREEN")
  176. try:
  177. os.remove(os.path.join(live_folder_path,'folder.lock'))
  178. except OSError:
  179. pass
  180. sys.exit(1)
  181. except Exception as e:
  182. log('[E] Could not stitch downloaded files: {:s}'.format(str(e)), "RED")
  183. seperator("GREEN")
  184. try:
  185. os.remove(os.path.join(live_folder_path,'folder.lock'))
  186. except OSError:
  187. pass
  188. sys.exit(1)
  189. except KeyboardInterrupt:
  190. log('[I] Aborted stitching process, no video was created.', "YELLOW")
  191. seperator("GREEN")
  192. try:
  193. os.remove(os.path.join(live_folder_path,'folder.lock'))
  194. except OSError:
  195. pass
  196. sys.exit(0)
  197. def get_user_info(user_to_download):
  198. try:
  199. user_res = instagram_api.username_info(user_to_download)
  200. user_id = user_res.get('user', {}).get('pk')
  201. except ClientConnectionError as e:
  202. if "timed out" in str(e):
  203. log('[E] Could not get information for "{:s}": The connection has timed out.'.format(user_to_download), "RED")
  204. else:
  205. log('[E] Could not get information for "{:s}".\n[E] Error message: {:s}\n[E] Code: {:d}\n[E] Response: {:s}'.format(user_to_download, str(e), e.code, e.error_response), "RED")
  206. seperator("GREEN")
  207. sys.exit(1)
  208. except Exception as e:
  209. log('[E] Could not get information for "{:s}".\n[E] Error message: {:s}\n[E] Code: {:d}\n[E] Response: {:s}'.format(user_to_download, str(e), e.code, e.error_response), "RED")
  210. seperator("GREEN")
  211. sys.exit(1)
  212. except KeyboardInterrupt:
  213. log('[W] Aborted getting information for "{:s}", exiting...'.format(user_to_download), "YELLOW")
  214. seperator("GREEN")
  215. sys.exit(1)
  216. log('[I] Getting info for "{:s}" successful.'.format(user_to_download), "GREEN")
  217. get_broadcasts_info(user_id)
  218. def get_broadcasts_info(user_id):
  219. try:
  220. log('[I] Checking for livestreams and replays...', "GREEN")
  221. seperator("GREEN")
  222. broadcasts = instagram_api.user_story_feed(user_id)
  223. livestream = broadcasts.get('broadcast')
  224. replays = broadcasts.get('post_live_item', {}).get('broadcasts', [])
  225. if livestream:
  226. download_livestream(livestream)
  227. else:
  228. log('[I] There are no available livestreams.', "YELLOW")
  229. if settings.save_replays.title() == "True":
  230. if replays:
  231. seperator("GREEN")
  232. download_replays(replays)
  233. else:
  234. log('[I] There are no available replays.', "YELLOW")
  235. else:
  236. log("[I] Replay saving is disabled either with a flag or in the config file.", "BLUE")
  237. seperator("GREEN")
  238. except Exception as e:
  239. log('[E] Could not finish checking: {:s}'.format(str(e)), "RED")
  240. except ClientThrottledError as cte:
  241. log('[E] Could not check because you are making too many requests at this time.', "RED")
  242. log('[E] Error response: {:s}'.format(str(cte)), "RED")
  243. def download_replays(broadcasts):
  244. try:
  245. log("[I] Downloading replays... press [CTRL+C] to abort.", "GREEN")
  246. seperator("GREEN")
  247. for replay_index, broadcast in enumerate(broadcasts):
  248. exists = False
  249. if sys.version.split(' ')[0].startswith('2'):
  250. directories = (os.walk(settings.save_path).next()[1])
  251. else:
  252. directories = (os.walk(settings.save_path).__next__()[1])
  253. for directory in directories:
  254. if (str(broadcast.get('id')) in directory) and ("_live_" not in directory):
  255. log("[W] Already downloaded a replay with ID '{:s}'.".format(str(broadcast.get('id'))), "YELLOW")
  256. exists = True
  257. if not exists:
  258. current = replay_index + 1
  259. log("[I] Downloading replay {:s} of {:s} with ID '{:s}'...".format(str(current), str(len(broadcasts)), str(broadcast.get('id'))), "GREEN")
  260. current_time = str(int(time.time()))
  261. output_dir = '{}{}_{}_{}_{}_replay_downloads'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  262. broadcast_downloader = replay.Downloader(
  263. mpd=broadcast.get('dash_manifest'),
  264. output_dir=output_dir,
  265. user_agent=instagram_api.user_agent)
  266. open(os.path.join(output_dir,'folder.lock'), 'a').close()
  267. replay_mp4_file = '{}{}_{}_{}_{}_replay.mp4'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  268. replay_json_file = os.path.join(output_dir, '{}_{}_{}_{}_replay_comments.json'.format(settings.current_date, user_to_download, broadcast.get('id'), settings.current_time))
  269. if settings.clear_temp_files.title() == "True":
  270. replay_saved = broadcast_downloader.download(replay_mp4_file, cleartempfiles=True)
  271. else:
  272. replay_saved = broadcast_downloader.download(replay_mp4_file, cleartempfiles=False)
  273. if settings.save_comments.title() == "True":
  274. log("[I] Checking for available comments to save...", "GREEN")
  275. try:
  276. get_replay_comments(instagram_api, broadcast, replay_json_file, broadcast_downloader)
  277. except Exception as e:
  278. log('[E] An error occurred while checking comments: {:s}'.format(str(e)), "RED")
  279. if (len(replay_saved) == 1):
  280. log("[I] Finished downloading replay {:s} of {:s}.".format(str(current), str(len(broadcasts))), "GREEN")
  281. try:
  282. os.remove(os.path.join(output_dir,'folder.lock'))
  283. except OSError:
  284. pass
  285. if (current != len(broadcasts)):
  286. seperator("GREEN")
  287. else:
  288. log("[W] No output video file was made, please merge the files manually if possible.", "YELLOW")
  289. log("[W] Check if ffmpeg is available by running ffmpeg in your terminal/cmd prompt.", "YELLOW")
  290. log("", "GREEN")
  291. seperator("GREEN")
  292. log("[I] Finished downloading all available replays.", "GREEN")
  293. seperator("GREEN")
  294. sys.exit(0)
  295. except Exception as e:
  296. log('[E] Could not save replay: {:s}'.format(str(e)), "RED")
  297. seperator("GREEN")
  298. try:
  299. os.remove(os.path.join(output_dir,'folder.lock'))
  300. except OSError:
  301. pass
  302. sys.exit(1)
  303. except KeyboardInterrupt:
  304. seperator("GREEN")
  305. log('[I] The download has been aborted by the user.', "YELLOW")
  306. seperator("GREEN")
  307. try:
  308. shutil.rmtree(output_dir)
  309. except Exception as e:
  310. log("[E] Could not remove temp folder: {:s}".format(str(e)), "RED")
  311. sys.exit(1)
  312. sys.exit(0)
  313. def get_replay_comments(instagram_api, broadcast, comments_json_file, broadcast_downloader):
  314. try:
  315. comments_downloader = CommentsDownloader(
  316. api=instagram_api, broadcast=broadcast, destination_file=comments_json_file)
  317. comments_downloader.get_replay()
  318. try:
  319. if comments_downloader.comments:
  320. comments_log_file = comments_json_file.replace('.json', '.log')
  321. comment_errors, total_comments = CommentsDownloader.generate_log(
  322. comments_downloader.comments, broadcast.get('published_time'), comments_log_file,
  323. comments_delay=0)
  324. if total_comments == 1:
  325. log("[I] Successfully saved 1 comment to logfile.", "GREEN")
  326. seperator("GREEN")
  327. return True
  328. else:
  329. if comment_errors:
  330. log("[W] Successfully saved {:s} comments to logfile but {:s} comments are (partially) missing.".format(str(total_comments), str(comment_errors)), "YELLOW")
  331. else:
  332. log("[I] Successfully saved {:s} comments to logfile.".format(str(total_comments)), "GREEN")
  333. seperator("GREEN")
  334. return True
  335. else:
  336. log("[I] There are no available comments to save.", "GREEN")
  337. return False
  338. except Exception as e:
  339. log('[E] Could not save comments to logfile: {:s}'.format(str(e)), "RED")
  340. return False
  341. except KeyboardInterrupt as e:
  342. log("[W] Downloading replay comments has been aborted.", "YELLOW")
  343. return False
  344. def get_live_comments(instagram_api, broadcast, comments_json_file, broadcast_downloader):
  345. try:
  346. comments_downloader = CommentsDownloader(
  347. api=instagram_api, broadcast=broadcast, destination_file=comments_json_file)
  348. first_comment_created_at = 0
  349. try:
  350. while not broadcast_downloader.is_aborted:
  351. if 'initial_buffered_duration' not in broadcast and broadcast_downloader.initial_buffered_duration:
  352. broadcast['initial_buffered_duration'] = broadcast_downloader.initial_buffered_duration
  353. comments_downloader.broadcast = broadcast
  354. first_comment_created_at = comments_downloader.get_live(first_comment_created_at)
  355. except ClientError as e:
  356. if not 'media has been deleted' in e.error_response:
  357. log("[W] Comment collection ClientError: %d %s" % (e.code, e.error_response), "YELLOW")
  358. try:
  359. if comments_downloader.comments:
  360. comments_downloader.save()
  361. comments_log_file = comments_json_file.replace('.json', '.log')
  362. comment_errors, total_comments = CommentsDownloader.generate_log(
  363. comments_downloader.comments, settings.current_time, comments_log_file,
  364. comments_delay=broadcast_downloader.initial_buffered_duration)
  365. if len(comments_downloader.comments) == 1:
  366. log("[I] Successfully saved 1 comment to logfile.", "GREEN")
  367. seperator("GREEN")
  368. return True
  369. else:
  370. if comment_errors:
  371. log("[W] Successfully saved {:s} comments to logfile but {:s} comments are (partially) missing.".format(str(total_comments), str(comment_errors)), "YELLOW")
  372. else:
  373. log("[I] Successfully saved {:s} comments to logfile.".format(str(total_comments)), "GREEN")
  374. seperator("GREEN")
  375. return True
  376. else:
  377. log("[I] There are no available comments to save.", "GREEN")
  378. return False
  379. seperator("GREEN")
  380. except Exception as e:
  381. log('[E] Could not save comments to logfile: {:s}'.format(str(e)), "RED")
  382. return False
  383. except KeyboardInterrupt as e:
  384. log("[W] Downloading livestream comments has been aborted.", "YELLOW")
  385. return False