downloader.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. import os
  2. import shutil
  3. import subprocess
  4. import sys
  5. import threading
  6. import time
  7. import shlex
  8. import json
  9. from xml.dom.minidom import parse, parseString
  10. from instagram_private_api import ClientConnectionError
  11. from instagram_private_api import ClientError
  12. from instagram_private_api import ClientThrottledError
  13. from instagram_private_api_extensions import live
  14. from instagram_private_api_extensions import replay
  15. from .comments import CommentsDownloader
  16. from .logger import log_seperator, supports_color, log_info_blue, log_info_green, log_warn, log_error, log_whiteline, log_plain
  17. def start_single(instagram_api_arg, download_arg, settings_arg):
  18. global instagram_api
  19. global user_to_download
  20. global broadcast
  21. global settings
  22. settings = settings_arg
  23. instagram_api = instagram_api_arg
  24. user_to_download = download_arg
  25. get_user_info(user_to_download)
  26. def start_multiple(instagram_api_arg, settings_arg, proc_arg):
  27. try:
  28. log_info_green("Checking following users for any livestreams or replays...")
  29. broadcast_f_list = instagram_api_arg.reels_tray()
  30. usernames_available = []
  31. if broadcast_f_list['broadcasts']:
  32. for broadcast_f in broadcast_f_list['broadcasts']:
  33. username = broadcast_f['broadcast_owner']['username']
  34. if username not in usernames_available:
  35. usernames_available.append(username)
  36. if broadcast_f_list.get('post_live', {}).get('post_live_items', []):
  37. for broadcast_r in broadcast_f_list.get('post_live', {}).get('post_live_items', []):
  38. for broadcast_f in broadcast_r.get("broadcasts", []):
  39. username = broadcast_f['broadcast_owner']['username']
  40. if username not in usernames_available:
  41. usernames_available.append(username)
  42. log_seperator()
  43. if usernames_available:
  44. log_info_green("The following users have available livestreams or replays:")
  45. log_info_green(', '.join(usernames_available))
  46. log_seperator()
  47. for index, user in enumerate(usernames_available):
  48. try:
  49. log_info_green("Launching daemon process for '{:s}'...".format(user))
  50. start_result = run_command("{:s} -d {:s}".format(proc_arg, user))
  51. if start_result:
  52. log_info_green("Could not start processs: {:s}".format(str(start_result)))
  53. else:
  54. log_info_green("Process started successfully.")
  55. log_seperator()
  56. time.sleep(2)
  57. except Exception as e:
  58. log_error("Could not start processs: {:s}".format(str(e)))
  59. except KeyboardInterrupt:
  60. log_info_blue('The process launching has been aborted by the user.')
  61. log_seperator()
  62. exit(0)
  63. except Exception as e:
  64. log_error("Could not finish checking following users: {:s}".format(str(e)))
  65. exit(1)
  66. except KeyboardInterrupt:
  67. log_seperator()
  68. log_info_blue('The checking process has been aborted by the user.')
  69. log_seperator()
  70. exit(0)
  71. #open("reels.json", "w").write(json.dumps(following_broadcasts))
  72. def run_command(command):
  73. try:
  74. FNULL = open(os.devnull, 'w')
  75. subprocess.Popen(shlex.split(command), stdout=FNULL, stderr=subprocess.STDOUT)
  76. return False
  77. except Exception as e:
  78. return str(e)
  79. def get_stream_duration(compare_time, broadcast=None):
  80. try:
  81. had_wrong_time = False
  82. if broadcast:
  83. if (int(time.time()) < int(compare_time)):
  84. had_wrong_time = True
  85. corrected_compare_time = int(compare_time) - 5
  86. download_time = int(time.time()) - int(corrected_compare_time)
  87. else:
  88. download_time = int(time.time()) - int(compare_time)
  89. stream_time = int(time.time()) - int(broadcast.get('published_time'))
  90. stream_started_mins, stream_started_secs = divmod(stream_time - download_time, 60)
  91. else:
  92. if (int(time.time()) < int(compare_time)):
  93. had_wrong_time = True
  94. corrected_compare_time = int(compare_time) - 5
  95. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(corrected_compare_time)), 60)
  96. else:
  97. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(compare_time)), 60)
  98. stream_duration_str = '%d minutes' % stream_started_mins
  99. if stream_started_secs:
  100. stream_duration_str += ' and %d seconds' % stream_started_secs
  101. if had_wrong_time:
  102. return "{:s} (corrected)".format(stream_duration_str)
  103. else:
  104. return stream_duration_str
  105. except Exception as e:
  106. return "Not available"
  107. def download_livestream(broadcast):
  108. try:
  109. def print_status(sep=True):
  110. heartbeat_info = instagram_api.broadcast_heartbeat_and_viewercount(broadcast.get('id'))
  111. viewers = broadcast.get('viewer_count', 0)
  112. if sep:
  113. log_seperator()
  114. log_info_green('Viewers : {:s} watching'.format(str(int(viewers))))
  115. log_info_green('Airing time : {:s}'.format(get_stream_duration(broadcast.get('published_time'))))
  116. log_info_green('Status : {:s}'.format(heartbeat_info.get('broadcast_status').title()))
  117. return heartbeat_info.get('broadcast_status') not in ['active', 'interrupted']
  118. mpd_url = (broadcast.get('dash_manifest')
  119. or broadcast.get('dash_abr_playback_url')
  120. or broadcast.get('dash_playback_url'))
  121. output_dir = '{}{}_{}_{}_{}_live_downloads'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  122. broadcast_downloader = live.Downloader(
  123. mpd=mpd_url,
  124. output_dir=output_dir,
  125. user_agent=instagram_api.user_agent,
  126. max_connection_error_retry=3,
  127. duplicate_etag_retry=30,
  128. callback_check=print_status,
  129. mpd_download_timeout=3,
  130. download_timeout=3)
  131. except Exception as e:
  132. log_error('Could not start downloading livestream: {:s}'.format(str(e)))
  133. log_seperator()
  134. sys.exit(1)
  135. try:
  136. log_info_green('Livestream found, beginning download...')
  137. broadcast_owner = broadcast.get('broadcast_owner', {}).get('username')
  138. try:
  139. broadcast_guest = broadcast.get('cobroadcasters', {})[0].get('username')
  140. except:
  141. broadcast_guest = None
  142. if (broadcast_owner != user_to_download):
  143. log_info_blue('This livestream is a dual-live, the owner is "{}".'.format(broadcast_owner))
  144. broadcast_guest = None
  145. if broadcast_guest:
  146. log_info_blue('This livestream is a dual-live, the current guest is "{}".'.format(broadcast_guest))
  147. log_seperator()
  148. log_info_green('Username : {:s}'.format(user_to_download))
  149. print_status(False)
  150. log_info_green('MPD URL : {:s}'.format(mpd_url))
  151. log_seperator()
  152. open(os.path.join(output_dir, 'folder.lock'), 'a').close()
  153. log_info_green('Downloading livestream... press [CTRL+C] to abort.')
  154. if (settings.run_at_start is not "None"):
  155. try:
  156. thread = threading.Thread(target=run_command, args=(settings.run_at_start,))
  157. thread.daemon = True
  158. thread.start()
  159. log_info_green("Command executed: \033[94m{:s}".format(settings.run_at_start))
  160. except Exception as e:
  161. log_warn('Could not execute command: {:s}'.format(str(e)))
  162. comment_thread_worker = None
  163. if settings.save_comments.title() == "True":
  164. try:
  165. comments_json_file = os.path.join(output_dir, '{}_{}_{}_{}_live_comments.json'.format(settings.current_date, user_to_download, broadcast.get('id'), settings.current_time))
  166. comment_thread_worker = threading.Thread(target=get_live_comments, args=(instagram_api, broadcast, comments_json_file, broadcast_downloader,))
  167. comment_thread_worker.start()
  168. except Exception as e:
  169. log_error('An error occurred while downloading comments: {:s}'.format(str(e)))
  170. broadcast_downloader.run()
  171. log_seperator()
  172. log_info_green('Download duration : {}'.format(get_stream_duration(int(settings.current_time))))
  173. log_info_green('Stream duration : {}'.format(get_stream_duration(broadcast.get('published_time'))))
  174. log_info_green('Missing (approx.) : {}'.format(get_stream_duration(int(settings.current_time), broadcast)))
  175. log_seperator()
  176. stitch_video(broadcast_downloader, broadcast, comment_thread_worker)
  177. except KeyboardInterrupt:
  178. log_seperator()
  179. log_info_blue('The download has been aborted by the user.')
  180. log_seperator()
  181. log_info_green('Download duration : {}'.format(get_stream_duration(int(settings.current_time))))
  182. log_info_green('Stream duration : {}'.format(get_stream_duration(broadcast.get('published_time'))))
  183. log_info_green('Missing (approx.) : {}'.format(get_stream_duration(int(settings.current_time), broadcast)))
  184. log_seperator()
  185. if not broadcast_downloader.is_aborted:
  186. broadcast_downloader.stop()
  187. stitch_video(broadcast_downloader, broadcast, comment_thread_worker)
  188. except Exception as e:
  189. log_error("Could not download livestream: {:s}".format(str(e)))
  190. try:
  191. os.remove(os.path.join(output_dir, 'folder.lock'))
  192. except Exception:
  193. pass
  194. def stitch_video(broadcast_downloader, broadcast, comment_thread_worker):
  195. try:
  196. live_mp4_file = '{}{}_{}_{}_{}_live.mp4'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  197. live_folder_path = "{:s}_downloads".format(live_mp4_file.split('.mp4')[0])
  198. if comment_thread_worker and comment_thread_worker.is_alive():
  199. log_info_green("Waiting for comment downloader to end cycle...")
  200. comment_thread_worker.join()
  201. if (settings.run_at_finish is not "None"):
  202. try:
  203. thread = threading.Thread(target=run_command, args=(settings.run_at_finish,))
  204. thread.daemon = True
  205. thread.start()
  206. log_info_green("Command executed: \033[94m{:s}".format(settings.run_at_finish))
  207. except Exception as e:
  208. log_warn('Could not execute command: {:s}'.format(str(e)))
  209. log_info_green('Stitching downloaded files into video...')
  210. try:
  211. if settings.clear_temp_files.title() == "True":
  212. broadcast_downloader.stitch(live_mp4_file, cleartempfiles=True)
  213. else:
  214. broadcast_downloader.stitch(live_mp4_file, cleartempfiles=False)
  215. log_info_green('Successfully stitched downloaded files into video.')
  216. try:
  217. os.remove(os.path.join(live_folder_path, 'folder.lock'))
  218. except Exception:
  219. pass
  220. if settings.clear_temp_files.title() == "True":
  221. try:
  222. shutil.rmtree(live_folder_path)
  223. except Exception as e:
  224. log_error("Could not remove temp folder: {:s}".format(str(e)))
  225. log_seperator()
  226. sys.exit(0)
  227. except ValueError as e:
  228. log_error('Could not stitch downloaded files: {:s}'.format(str(e)))
  229. log_error('Likely the download duration was too short and no temp files were saved.')
  230. log_seperator()
  231. try:
  232. os.remove(os.path.join(live_folder_path, 'folder.lock'))
  233. except Exception:
  234. pass
  235. sys.exit(1)
  236. except Exception as e:
  237. log_error('Could not stitch downloaded files: {:s}'.format(str(e)))
  238. log_seperator()
  239. try:
  240. os.remove(os.path.join(live_folder_path, 'folder.lock'))
  241. except Exception:
  242. pass
  243. sys.exit(1)
  244. except KeyboardInterrupt:
  245. log_info_blue('Aborted stitching process, no video was created.')
  246. log_seperator()
  247. try:
  248. os.remove(os.path.join(live_folder_path, 'folder.lock'))
  249. except Exception:
  250. pass
  251. sys.exit(0)
  252. def get_user_info(user_to_download):
  253. try:
  254. user_res = instagram_api.username_info(user_to_download)
  255. user_id = user_res.get('user', {}).get('pk')
  256. except ClientConnectionError as cce:
  257. log_error('Could not get user info for "{:s}": {:d} {:s}'.format(user_to_download, cce.code, str(cce)))
  258. if "getaddrinfo failed" in str(cce):
  259. log_error('Could not resolve host, check your internet connection.')
  260. if "timed out" in str(cce):
  261. log_error('The connection timed out, check your internet connection.')
  262. log_seperator()
  263. sys.exit(1)
  264. except ClientThrottledError as cte:
  265. log_error('Could not get user info for "{:s}": {:d} {:s}.'.format(user_to_download, cte.code, str(cte)))
  266. log_error('You are making too many requests at this time.')
  267. log_seperator()
  268. sys.exit(1)
  269. except ClientError as ce:
  270. log_error('Could not get user info for "{:s}": {:d} {:s}'.format(user_to_download, ce.code, str(ce)))
  271. if ("Not Found") in str(ce):
  272. log_error('The specified user does not exist.')
  273. log_seperator()
  274. sys.exit(1)
  275. except Exception as e:
  276. log_error('Could not get user info for "{:s}": {:s}'.format(user_to_download, str(e)))
  277. log_seperator()
  278. sys.exit(1)
  279. except KeyboardInterrupt:
  280. log_info_blue('Aborted getting user info for "{:s}", exiting...'.format(user_to_download))
  281. log_seperator()
  282. sys.exit(0)
  283. log_info_green('Getting info for "{:s}" successful.'.format(user_to_download))
  284. get_broadcasts_info(user_id)
  285. def get_broadcasts_info(user_id):
  286. try:
  287. log_seperator()
  288. log_info_green('Checking for livestreams and replays...')
  289. log_seperator()
  290. broadcasts = instagram_api.user_story_feed(user_id)
  291. livestream = broadcasts.get('broadcast')
  292. replays = broadcasts.get('post_live_item', {}).get('broadcasts', [])
  293. if settings.save_lives.title() == "True":
  294. if livestream:
  295. download_livestream(livestream)
  296. else:
  297. log_info_green('There are no available livestreams.')
  298. else:
  299. log_info_blue("Livestream saving is disabled either with an argument or in the config file.")
  300. if settings.save_replays.title() == "True":
  301. if replays:
  302. log_seperator()
  303. log_info_green('Replays found, beginning download...')
  304. log_seperator()
  305. download_replays(replays)
  306. else:
  307. log_info_green('There are no available replays.')
  308. else:
  309. log_seperator()
  310. log_info_blue("Replay saving is disabled either with an argument or in the config file.")
  311. log_seperator()
  312. except Exception as e:
  313. log_error('Could not finish checking: {:s}'.format(str(e)))
  314. if "timed out" in str(e):
  315. log_error('The connection timed out, check your internet connection.')
  316. log_seperator()
  317. exit(1)
  318. except KeyboardInterrupt:
  319. log_info_blue('Aborted checking for livestreams and replays, exiting...'.format(user_to_download))
  320. log_seperator()
  321. sys.exit(1)
  322. except ClientThrottledError as cte:
  323. log_error('Could not check because you are making too many requests at this time.')
  324. log_seperator()
  325. exit(1)
  326. def download_replays(broadcasts):
  327. try:
  328. try:
  329. log_info_green('Amount of replays : {:s}'.format(str(len(broadcasts))))
  330. for replay_index, broadcast in enumerate(broadcasts):
  331. bc_dash_manifest = parseString(broadcast.get('dash_manifest')).getElementsByTagName('Period')
  332. bc_duration_raw = bc_dash_manifest[0].getAttribute("duration")
  333. bc_hours = (bc_duration_raw.split("PT"))[1].split("H")[0]
  334. bc_minutes = (bc_duration_raw.split("H"))[1].split("M")[0]
  335. bc_seconds = ((bc_duration_raw.split("M"))[1].split("S")[0]).split('.')[0]
  336. log_info_green('Replay {:s} duration : {:s} minutes and {:s} seconds'.format(str(replay_index + 1), bc_minutes, bc_seconds))
  337. except Exception as e:
  338. log_warn("An error occurred while getting replay duration information: {:s}".format(str(e)))
  339. log_seperator()
  340. log_info_green("Downloading replays... press [CTRL+C] to abort.")
  341. log_seperator()
  342. for replay_index, broadcast in enumerate(broadcasts):
  343. exists = False
  344. if sys.version.split(' ')[0].startswith('2'):
  345. directories = (os.walk(settings.save_path).next()[1])
  346. else:
  347. directories = (os.walk(settings.save_path).__next__()[1])
  348. for directory in directories:
  349. if (str(broadcast.get('id')) in directory) and ("_live_" not in directory):
  350. log_info_blue("Already downloaded a replay with ID '{:s}'.".format(str(broadcast.get('id'))))
  351. exists = True
  352. if not exists:
  353. current = replay_index + 1
  354. log_info_green("Downloading replay {:s} of {:s} with ID '{:s}'...".format(str(current), str(len(broadcasts)), str(broadcast.get('id'))))
  355. current_time = str(int(time.time()))
  356. output_dir = '{}{}_{}_{}_{}_replay_downloads'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  357. broadcast_downloader = replay.Downloader(
  358. mpd=broadcast.get('dash_manifest'),
  359. output_dir=output_dir,
  360. user_agent=instagram_api.user_agent)
  361. open(os.path.join(output_dir, 'folder.lock'), 'a').close()
  362. replay_mp4_file = '{}{}_{}_{}_{}_replay.mp4'.format(settings.save_path, settings.current_date, user_to_download, broadcast.get('id'), settings.current_time)
  363. replay_json_file = os.path.join(output_dir, '{}_{}_{}_{}_replay_comments.json'.format(settings.current_date, user_to_download, broadcast.get('id'), settings.current_time))
  364. if settings.clear_temp_files.title() == "True":
  365. replay_saved = broadcast_downloader.download(replay_mp4_file, cleartempfiles=True)
  366. else:
  367. replay_saved = broadcast_downloader.download(replay_mp4_file, cleartempfiles=False)
  368. if settings.save_comments.title() == "True":
  369. log_info_green("Downloading replay comments...")
  370. try:
  371. get_replay_comments(instagram_api, broadcast, replay_json_file, broadcast_downloader)
  372. except Exception as e:
  373. log_error('An error occurred while downloading comments: {:s}'.format(str(e)))
  374. if (len(replay_saved) == 1):
  375. log_info_green("Finished downloading replay {:s} of {:s}.".format(str(current), str(len(broadcasts))))
  376. try:
  377. os.remove(os.path.join(output_dir, 'folder.lock'))
  378. except Exception:
  379. pass
  380. if (current != len(broadcasts)):
  381. log_seperator()
  382. else:
  383. log_warn("No output video file was made, please merge the files manually if possible.")
  384. log_warn("Check if ffmpeg is available by running ffmpeg in your terminal/cmd prompt.")
  385. log_whiteline()
  386. log_seperator()
  387. log_info_green("Finished downloading all available replays.")
  388. log_seperator()
  389. sys.exit(0)
  390. except Exception as e:
  391. log_error('Could not save replay: {:s}'.format(str(e)))
  392. log_seperator()
  393. try:
  394. os.remove(os.path.join(output_dir, 'folder.lock'))
  395. except Exception:
  396. pass
  397. sys.exit(1)
  398. except KeyboardInterrupt:
  399. log_seperator()
  400. log_info_blue('The download has been aborted by the user, exiting...')
  401. log_seperator()
  402. try:
  403. shutil.rmtree(output_dir)
  404. except Exception as e:
  405. log_error("Could not remove temp folder: {:s}".format(str(e)))
  406. sys.exit(1)
  407. sys.exit(0)
  408. def get_replay_comments(instagram_api, broadcast, comments_json_file, broadcast_downloader):
  409. try:
  410. comments_downloader = CommentsDownloader(
  411. api=instagram_api, broadcast=broadcast, destination_file=comments_json_file)
  412. comments_downloader.get_replay()
  413. try:
  414. if comments_downloader.comments:
  415. comments_log_file = comments_json_file.replace('.json', '.log')
  416. comment_errors, total_comments = CommentsDownloader.generate_log(
  417. comments_downloader.comments, broadcast.get('published_time'), comments_log_file,
  418. comments_delay=0)
  419. if total_comments == 1:
  420. log_info_green("Successfully saved 1 comment to logfile.")
  421. log_seperator()
  422. return True
  423. else:
  424. if comment_errors:
  425. log_warn("Successfully saved {:s} comments to logfile but {:s} comments are (partially) missing.".format(str(total_comments), str(comment_errors)))
  426. else:
  427. log_info_green("Successfully saved {:s} comments to logfile.".format(str(total_comments)))
  428. log_seperator()
  429. return True
  430. else:
  431. log_info_green("There are no available comments to save.")
  432. return False
  433. except Exception as e:
  434. log_error('Could not save comments to logfile: {:s}'.format(str(e)))
  435. return False
  436. except KeyboardInterrupt as e:
  437. log_info_blue("Downloading replay comments has been aborted.")
  438. return False
  439. def get_live_comments(instagram_api, broadcast, comments_json_file, broadcast_downloader):
  440. try:
  441. comments_downloader = CommentsDownloader(
  442. api=instagram_api, broadcast=broadcast, destination_file=comments_json_file)
  443. first_comment_created_at = 0
  444. try:
  445. while not broadcast_downloader.is_aborted:
  446. if 'initial_buffered_duration' not in broadcast and broadcast_downloader.initial_buffered_duration:
  447. broadcast['initial_buffered_duration'] = broadcast_downloader.initial_buffered_duration
  448. comments_downloader.broadcast = broadcast
  449. first_comment_created_at = comments_downloader.get_live(first_comment_created_at)
  450. except ClientError as e:
  451. if not 'media has been deleted' in e.error_response:
  452. log_warn("Comment collection ClientError: %d %s" % (e.code, e.error_response))
  453. try:
  454. if comments_downloader.comments:
  455. comments_downloader.save()
  456. comments_log_file = comments_json_file.replace('.json', '.log')
  457. comment_errors, total_comments = CommentsDownloader.generate_log(
  458. comments_downloader.comments, settings.current_time, comments_log_file,
  459. comments_delay=broadcast_downloader.initial_buffered_duration)
  460. if len(comments_downloader.comments) == 1:
  461. log_info_green("Successfully saved 1 comment to logfile.")
  462. log_seperator()
  463. return True
  464. else:
  465. if comment_errors:
  466. log_warn("Successfully saved {:s} comments to logfile but {:s} comments are (partially) missing.".format(str(total_comments), str(comment_errors)))
  467. else:
  468. log_info_green("Successfully saved {:s} comments to logfile.".format(str(total_comments)))
  469. log_seperator()
  470. return True
  471. else:
  472. log_info_green("There are no available comments to save.")
  473. return False
  474. log_seperator()
  475. except Exception as e:
  476. log_error('Could not save comments to logfile: {:s}'.format(str(e)))
  477. return False
  478. except KeyboardInterrupt as e:
  479. log_info_blue("Downloading livestream comments has been aborted.")
  480. return False