dlfuncs.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. import os
  2. import shutil
  3. import json
  4. import threading
  5. import time
  6. from xml.dom.minidom import parseString
  7. from instagram_private_api import ClientConnectionError
  8. from instagram_private_api import ClientError
  9. from instagram_private_api import ClientThrottledError
  10. from instagram_private_api_extensions import live
  11. from instagram_private_api_extensions import replay
  12. try:
  13. import logger
  14. import helpers
  15. import pil
  16. import dlfuncs
  17. import assembler
  18. from constants import Constants
  19. from comments import CommentsDownloader
  20. except ImportError:
  21. from . import logger
  22. from . import helpers
  23. from . import pil
  24. from . import assembler
  25. from . import dlfuncs
  26. from .constants import Constants
  27. from .comments import CommentsDownloader
  28. def get_stream_duration(duration_type):
  29. try:
  30. # For some reason the published_time is roughly 40 seconds behind real world time
  31. if duration_type == 0: # Airtime duration
  32. stream_started_mins, stream_started_secs = divmod((int(time.time()) - pil.livestream_obj.get("published_time") + 40), 60)
  33. if duration_type == 1: # Download duration
  34. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(pil.epochtime)), 60)
  35. if duration_type == 2: # Missing duration
  36. if (int(pil.epochtime) - pil.livestream_obj.get("published_time") + 40) <= 0:
  37. stream_started_mins, stream_started_secs = 0, 0 # Download started 'earlier' than actual broadcast, assume started at the same time instead
  38. else:
  39. stream_started_mins, stream_started_secs = divmod((int(pil.epochtime) - pil.livestream_obj.get("published_time") + 40), 60)
  40. if stream_started_mins < 0:
  41. stream_started_mins = 0
  42. if stream_started_secs < 0:
  43. stream_started_secs = 0
  44. stream_duration_str = '%d minutes' % stream_started_mins
  45. if stream_started_secs:
  46. stream_duration_str += ' and %d seconds' % stream_started_secs
  47. return stream_duration_str
  48. except Exception:
  49. return "Not available"
  50. def get_user_id():
  51. is_user_id = False
  52. user_id = None
  53. try:
  54. user_id = int(pil.dl_user)
  55. is_user_id = True
  56. except ValueError:
  57. try:
  58. user_res = pil.ig_api.username_info(pil.dl_user)
  59. if pil.verbose:
  60. logger.plain(json.dumps(user_res))
  61. user_id = user_res.get('user', {}).get('pk')
  62. except ClientConnectionError as cce:
  63. logger.error(
  64. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cce.code, str(cce)))
  65. if "getaddrinfo failed" in str(cce):
  66. logger.error('Could not resolve host, check your internet connection.')
  67. if "timed out" in str(cce):
  68. logger.error('The connection timed out, check your internet connection.')
  69. except ClientThrottledError as cte:
  70. logger.error(
  71. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cte.code, str(cte)))
  72. except ClientError as ce:
  73. logger.error(
  74. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, ce.code, str(ce)))
  75. if "Not Found" in str(ce):
  76. logger.error('The specified user does not exist.')
  77. except Exception as e:
  78. logger.error("Could not get user info for '{:s}': {:s}".format(pil.dl_user, str(e)))
  79. except KeyboardInterrupt:
  80. logger.binfo("Aborted getting user info for '{:s}', exiting.".format(pil.dl_user))
  81. if user_id and is_user_id:
  82. logger.info("Getting info for '{:s}' successful. Assuming input is an user Id.".format(pil.dl_user))
  83. logger.separator()
  84. return user_id
  85. elif user_id:
  86. logger.info("Getting info for '{:s}' successful.".format(pil.dl_user))
  87. logger.separator()
  88. return user_id
  89. else:
  90. return None
  91. def get_broadcasts_info():
  92. try:
  93. user_id = get_user_id()
  94. if user_id:
  95. broadcasts = pil.ig_api.user_story_feed(user_id)
  96. if pil.verbose:
  97. logger.plain(json.dumps(broadcasts))
  98. pil.livestream_obj = broadcasts.get('broadcast')
  99. pil.replays_obj = broadcasts.get('post_live_item', {}).get('broadcasts', [])
  100. return True
  101. else:
  102. return False
  103. except ClientThrottledError:
  104. logger.error('Could not check because you are making too many requests at this time.')
  105. return False
  106. except Exception as e:
  107. logger.error('Could not finish checking: {:s}'.format(str(e)))
  108. if "timed out" in str(e):
  109. logger.error('The connection timed out, check your internet connection.')
  110. if "login_required" in str(e):
  111. logger.error('Login cookie was loaded but user is not actually logged in. Delete the cookie file and try '
  112. 'again.')
  113. return False
  114. except KeyboardInterrupt:
  115. logger.binfo('Aborted checking for livestreams and replays, exiting.')
  116. return False
  117. def merge_segments():
  118. try:
  119. if pil.run_at_finish:
  120. try:
  121. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_finish,))
  122. thread.daemon = True
  123. thread.start()
  124. logger.binfo("Launched finish command: {:s}".format(pil.run_at_finish))
  125. except Exception as e:
  126. logger.warn('Could not execute command: {:s}'.format(str(e)))
  127. live_mp4_file = '{}{}_{}_{}_{}_live.mp4'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  128. pil.livestream_obj.get('id'), pil.epochtime)
  129. live_segments_path = os.path.normpath(pil.broadcast_downloader.output_dir)
  130. if pil.segments_json_thread_worker and pil.segments_json_thread_worker.is_alive():
  131. pil.segments_json_thread_worker.join()
  132. if pil.comment_thread_worker and pil.comment_thread_worker.is_alive():
  133. logger.info("Waiting for comment downloader to finish.")
  134. pil.comment_thread_worker.join()
  135. logger.info('Merging downloaded files into video.')
  136. try:
  137. pil.broadcast_downloader.stitch(live_mp4_file, cleartempfiles=pil.clear_temp_files)
  138. logger.info('Successfully merged downloaded files into video.')
  139. if pil.clear_temp_files:
  140. helpers.remove_temp_folder()
  141. helpers.remove_lock()
  142. except ValueError as e:
  143. logger.separator()
  144. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  145. if os.listdir(live_segments_path):
  146. logger.separator()
  147. logger.binfo("Segment directory is not empty. Trying to merge again.")
  148. logger.separator()
  149. pil.assemble_arg = live_mp4_file.replace(".mp4", "_downloads.json")
  150. assembler.assemble(user_called=False)
  151. else:
  152. logger.separator()
  153. logger.error("Segment directory is empty. There is nothing to merge.")
  154. logger.separator()
  155. helpers.remove_lock()
  156. except Exception as e:
  157. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  158. helpers.remove_lock()
  159. except KeyboardInterrupt:
  160. logger.binfo('Aborted merging process, no video was created.')
  161. helpers.remove_lock()
  162. def download_livestream():
  163. try:
  164. def print_status(sep=True):
  165. if pil.do_heartbeat:
  166. heartbeat_info = pil.ig_api.broadcast_heartbeat_and_viewercount(pil.livestream_obj.get('id'))
  167. if pil.verbose:
  168. logger.plain(json.dumps(heartbeat_info))
  169. viewers = pil.livestream_obj.get('viewer_count', 0)
  170. if sep:
  171. logger.separator()
  172. else:
  173. logger.info('Username : {:s}'.format(pil.dl_user))
  174. logger.info('Viewers : {:s} watching'.format(str(int(viewers))))
  175. logger.info('Airing time : {:s}'.format(get_stream_duration(0)))
  176. if pil.do_heartbeat:
  177. logger.info('Status : {:s}'.format(heartbeat_info.get('broadcast_status').title()))
  178. return heartbeat_info.get('broadcast_status') not in ['active', 'interrupted']
  179. else:
  180. return None
  181. mpd_url = (pil.livestream_obj.get('dash_manifest')
  182. or pil.livestream_obj.get('dash_abr_playback_url')
  183. or pil.livestream_obj.get('dash_playback_url'))
  184. pil.live_folder_path = '{}{}_{}_{}_{}_live_downloads'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  185. pil.livestream_obj.get('id'), pil.epochtime)
  186. pil.broadcast_downloader = live.Downloader(
  187. mpd=mpd_url,
  188. output_dir=pil.live_folder_path,
  189. user_agent=pil.ig_api.user_agent,
  190. max_connection_error_retry=3,
  191. duplicate_etag_retry=30,
  192. callback_check=print_status,
  193. mpd_download_timeout=3,
  194. download_timeout=3,
  195. ffmpeg_binary=pil.ffmpeg_path)
  196. except Exception as e:
  197. logger.error('Could not start downloading livestream: {:s}'.format(str(e)))
  198. logger.separator()
  199. helpers.remove_lock()
  200. try:
  201. broadcast_owner = pil.livestream_obj.get('broadcast_owner', {}).get('username')
  202. try:
  203. broadcast_guest = pil.livestream_obj.get('cobroadcasters', {})[0].get('username')
  204. except Exception:
  205. broadcast_guest = None
  206. if broadcast_owner != pil.dl_user:
  207. logger.binfo('This livestream is a dual-live, the owner is "{}".'.format(broadcast_owner))
  208. broadcast_guest = None
  209. if broadcast_guest:
  210. logger.binfo('This livestream is a dual-live, the current guest is "{}".'.format(broadcast_guest))
  211. pil.has_guest = broadcast_guest
  212. logger.separator()
  213. print_status(False)
  214. logger.separator()
  215. helpers.create_lock_folder()
  216. pil.segments_json_thread_worker = threading.Thread(target=helpers.generate_json_segments)
  217. pil.segments_json_thread_worker.start()
  218. logger.info('Downloading livestream, press [CTRL+C] to abort.')
  219. if pil.run_at_start:
  220. try:
  221. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_start,))
  222. thread.daemon = True
  223. thread.start()
  224. logger.binfo("Launched start command: {:s}".format(pil.run_at_start))
  225. except Exception as e:
  226. logger.warn('Could not launch command: {:s}'.format(str(e)))
  227. if pil.dl_comments:
  228. try:
  229. comments_json_file = '{}{}_{}_{}_{}_live_comments.json'.format(
  230. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  231. pil.comment_thread_worker = threading.Thread(target=get_live_comments, args=(comments_json_file,))
  232. pil.comment_thread_worker.start()
  233. except Exception as e:
  234. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  235. pil.broadcast_downloader.run()
  236. logger.separator()
  237. logger.info("The livestream has been ended by the user.")
  238. logger.separator()
  239. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  240. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  241. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  242. logger.separator()
  243. merge_segments()
  244. except KeyboardInterrupt:
  245. logger.separator()
  246. logger.binfo('The download has been aborted.')
  247. logger.separator()
  248. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  249. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  250. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  251. logger.separator()
  252. if not pil.broadcast_downloader.is_aborted:
  253. pil.broadcast_downloader.stop()
  254. merge_segments()
  255. def download_replays():
  256. try:
  257. try:
  258. logger.info('Amount of replays : {:s}'.format(str(len(pil.replays_obj))))
  259. for replay_index, replay_obj in enumerate(pil.replays_obj):
  260. bc_dash_manifest = parseString(replay_obj.get('dash_manifest')).getElementsByTagName('Period')
  261. bc_duration_raw = bc_dash_manifest[0].getAttribute("duration")
  262. bc_minutes = (bc_duration_raw.split("H"))[1].split("M")[0]
  263. bc_seconds = ((bc_duration_raw.split("M"))[1].split("S")[0]).split('.')[0]
  264. logger.info(
  265. 'Replay {:s} duration : {:s} minutes and {:s} seconds'.format(str(replay_index + 1), bc_minutes,
  266. bc_seconds))
  267. except Exception as e:
  268. logger.warn("An error occurred while getting replay duration information: {:s}".format(str(e)))
  269. logger.separator()
  270. logger.info("Downloading replays, press [CTRL+C] to abort.")
  271. logger.separator()
  272. for replay_index, replay_obj in enumerate(pil.replays_obj):
  273. exists = False
  274. pil.livestream_obj = replay_obj
  275. if Constants.PYTHON_VER[0][0] == '2':
  276. directories = (os.walk(pil.dl_path).next()[1])
  277. else:
  278. directories = (os.walk(pil.dl_path).__next__()[1])
  279. if pil.verbose:
  280. logger.separator()
  281. logger.plain("Listing contents of the folder '{}':".format(pil.dl_path))
  282. for directory in directories:
  283. logger.plain(directory)
  284. logger.separator()
  285. logger.separator()
  286. for directory in directories:
  287. if (str(replay_obj.get('id')) in directory) and ("_live_" not in directory):
  288. logger.binfo("Already downloaded a replay with ID '{:s}'.".format(str(replay_obj.get('id'))))
  289. exists = True
  290. if not exists:
  291. current = replay_index + 1
  292. logger.info(
  293. "Downloading replay {:s} of {:s} with ID '{:s}'.".format(str(current), str(len(pil.replays_obj)),
  294. str(replay_obj.get('id'))))
  295. pil.live_folder_path = '{}{}_{}_{}_{}_replay_downloads'.format(
  296. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  297. broadcast_downloader = replay.Downloader(
  298. mpd=replay_obj.get('dash_manifest'),
  299. output_dir=pil.live_folder_path,
  300. user_agent=pil.ig_api.user_agent,
  301. ffmpeg_binary=pil.ffmpeg_path)
  302. if pil.use_locks:
  303. helpers.create_lock_folder()
  304. replay_mp4_file = '{}{}_{}_{}_{}_replay.mp4'.format(
  305. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  306. comments_json_file = '{}{}_{}_{}_{}_replay_comments.json'.format(
  307. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  308. pil.comment_thread_worker = threading.Thread(target=get_replay_comments, args=(comments_json_file,))
  309. broadcast_downloader.download(replay_mp4_file, cleartempfiles=pil.clear_temp_files)
  310. if pil.clear_temp_files:
  311. helpers.remove_temp_folder()
  312. if pil.dl_comments:
  313. logger.info("Downloading replay comments.")
  314. try:
  315. get_replay_comments(comments_json_file)
  316. except Exception as e:
  317. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  318. logger.info("Finished downloading replay {:s} of {:s}.".format(str(current), str(len(pil.replays_obj))))
  319. helpers.remove_lock()
  320. if current != len(pil.replays_obj):
  321. logger.separator()
  322. logger.separator()
  323. logger.info("Finished downloading all available replays.")
  324. helpers.remove_lock()
  325. except Exception as e:
  326. logger.error('Could not save replay: {:s}'.format(str(e)))
  327. helpers.remove_lock()
  328. except KeyboardInterrupt:
  329. logger.separator()
  330. logger.binfo('The download has been aborted by the user, exiting.')
  331. helpers.remove_temp_folder()
  332. helpers.remove_lock()
  333. def download_following():
  334. try:
  335. is_checking = ''
  336. if pil.dl_lives and pil.dl_replays:
  337. is_checking = 'livestreams or replays'
  338. elif pil.dl_lives and not pil.dl_replays:
  339. is_checking = 'livestreams'
  340. elif not pil.dl_lives and pil.dl_replays:
  341. is_checking = 'replays'
  342. logger.info("Checking following users for any {:s}.".format(is_checking))
  343. broadcast_f_list = pil.ig_api.reels_tray()
  344. if pil.verbose:
  345. logger.plain(json.dumps(broadcast_f_list))
  346. usernames_available_livestreams = []
  347. usernames_available_replays = []
  348. if broadcast_f_list['broadcasts'] and pil.dl_lives:
  349. for broadcast_f in broadcast_f_list['broadcasts']:
  350. username = broadcast_f['broadcast_owner']['username']
  351. if username not in usernames_available_livestreams:
  352. usernames_available_livestreams.append(username)
  353. if broadcast_f_list.get('post_live', {}).get('post_live_items', []) and pil.dl_replays:
  354. for broadcast_r in broadcast_f_list.get('post_live', {}).get('post_live_items', []):
  355. for broadcast_f in broadcast_r.get("broadcasts", []):
  356. username = broadcast_f['broadcast_owner']['username']
  357. if username not in usernames_available_replays:
  358. usernames_available_replays.append(username)
  359. logger.separator()
  360. available_total = list(usernames_available_livestreams)
  361. available_total.extend(x for x in usernames_available_replays if x not in available_total)
  362. if available_total:
  363. logger.info("The following users have available {:s}.".format(is_checking))
  364. logger.info(', '.join(available_total))
  365. logger.separator()
  366. iterate_users(available_total)
  367. else:
  368. logger.info("There are currently no available {:s}.".format(is_checking))
  369. logger.separator()
  370. except Exception as e:
  371. logger.error("Could not finish checking following users: {:s}".format(str(e)))
  372. except KeyboardInterrupt:
  373. logger.separator()
  374. logger.binfo('The checking process has been aborted by the user.')
  375. logger.separator()
  376. def iterate_users(user_list):
  377. for user in user_list:
  378. try:
  379. if os.path.isfile(os.path.join(pil.dl_path, user + '.lock')):
  380. logger.warn("Lock file is already present for '{:s}', there is probably another download "
  381. "ongoing!".format(user))
  382. logger.warn(
  383. "If this is not the case, manually delete the file '{:s}' and try again.".format(user + '.lock'))
  384. else:
  385. logger.info("Launching daemon process for '{:s}'.".format(user))
  386. start_result = helpers.run_command("pyinstalive -d {:s} -cp '{:s}' -dp '{:s}' {:s} {:s} {:s}".format(
  387. user,
  388. pil.config_path,
  389. pil.dl_path,
  390. '--no-lives' if not pil.dl_lives else '',
  391. '--no-replays' if not pil.dl_replays else '',
  392. '--no-heartbeat' if not pil.do_heartbeat else ''))
  393. if start_result:
  394. logger.warn("Could not start process: {:s}".format(str(start_result)))
  395. else:
  396. logger.info("Process started successfully.")
  397. logger.separator()
  398. time.sleep(2)
  399. except Exception as e:
  400. logger.warn("Could not start process: {:s}".format(str(e)))
  401. except KeyboardInterrupt:
  402. logger.binfo('The process launching has been aborted by the user.')
  403. logger.separator()
  404. break
  405. def get_live_comments(comments_json_file):
  406. try:
  407. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  408. first_comment_created_at = 0
  409. try:
  410. while not pil.broadcast_downloader.is_aborted:
  411. if 'initial_buffered_duration' not in pil.livestream_obj and pil.broadcast_downloader.initial_buffered_duration:
  412. pil.livestream_obj['initial_buffered_duration'] = pil.broadcast_downloader.initial_buffered_duration
  413. comments_downloader.broadcast = pil.livestream_obj
  414. first_comment_created_at = comments_downloader.get_live(first_comment_created_at)
  415. except ClientError as e:
  416. if not 'media has been deleted' in e.error_response:
  417. logger.warn("Comment collection ClientError: %d %s" % (e.code, e.error_response))
  418. try:
  419. if comments_downloader.comments:
  420. comments_downloader.save()
  421. comments_log_file = comments_json_file.replace('.json', '.log')
  422. comment_errors, total_comments = CommentsDownloader.generate_log(
  423. comments_downloader.comments, pil.epochtime, comments_log_file,
  424. comments_delay=pil.broadcast_downloader.initial_buffered_duration)
  425. if len(comments_downloader.comments) == 1:
  426. logger.info("Successfully saved 1 comment.")
  427. os.remove(comments_json_file)
  428. logger.separator()
  429. return True
  430. else:
  431. if comment_errors:
  432. logger.warn(
  433. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  434. str(total_comments), str(comment_errors)))
  435. else:
  436. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  437. os.remove(comments_json_file)
  438. logger.separator()
  439. return True
  440. else:
  441. logger.info("There are no available comments to save.")
  442. logger.separator()
  443. return False
  444. except Exception as e:
  445. logger.error('Could not save comments: {:s}'.format(str(e)))
  446. return False
  447. except KeyboardInterrupt as e:
  448. logger.binfo("Downloading livestream comments has been aborted.")
  449. return False
  450. def get_replay_comments(comments_json_file):
  451. try:
  452. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  453. comments_downloader.get_replay()
  454. try:
  455. if comments_downloader.comments:
  456. comments_log_file = comments_json_file.replace('.json', '.log')
  457. comment_errors, total_comments = CommentsDownloader.generate_log(
  458. comments_downloader.comments, pil.livestream_obj.get('published_time'), comments_log_file,
  459. comments_delay=0)
  460. if total_comments == 1:
  461. logger.info("Successfully saved 1 comment to logfile.")
  462. os.remove(comments_json_file)
  463. logger.separator()
  464. return True
  465. else:
  466. if comment_errors:
  467. logger.warn(
  468. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  469. str(total_comments), str(comment_errors)))
  470. else:
  471. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  472. os.remove(comments_json_file)
  473. logger.separator()
  474. return True
  475. else:
  476. logger.info("There are no available comments to save.")
  477. return False
  478. except Exception as e:
  479. logger.error('Could not save comments to logfile: {:s}'.format(str(e)))
  480. return False
  481. except KeyboardInterrupt as e:
  482. logger.binfo("Downloading replay comments has been aborted.")
  483. return False