dlfuncs.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. import os
  2. import shutil
  3. import json
  4. import threading
  5. import time
  6. from xml.dom.minidom import parseString
  7. from instagram_private_api import ClientConnectionError
  8. from instagram_private_api import ClientError
  9. from instagram_private_api import ClientThrottledError
  10. from instagram_private_api_extensions import live
  11. from instagram_private_api_extensions import replay
  12. try:
  13. import logger
  14. import helpers
  15. import pil
  16. import dlfuncs
  17. import assembler
  18. from constants import Constants
  19. from comments import CommentsDownloader
  20. except ImportError:
  21. from . import logger
  22. from . import helpers
  23. from . import pil
  24. from . import assembler
  25. from . import dlfuncs
  26. from .constants import Constants
  27. from .comments import CommentsDownloader
  28. def get_stream_duration(duration_type):
  29. try:
  30. # For some reason the published_time is roughly 40 seconds behind real world time
  31. if duration_type == 0: # Airtime duration
  32. stream_started_mins, stream_started_secs = divmod((int(time.time()) - pil.livestream_obj.get("published_time")), 60)
  33. if duration_type == 1: # Download duration
  34. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(pil.epochtime)), 60)
  35. if duration_type == 2: # Missing duration
  36. if (int(pil.epochtime) - pil.livestream_obj.get("published_time")) <= 0:
  37. stream_started_mins, stream_started_secs = 0, 0 # Download started 'earlier' than actual broadcast, assume started at the same time instead
  38. else:
  39. stream_started_mins, stream_started_secs = divmod((int(pil.epochtime) - pil.livestream_obj.get("published_time")), 60)
  40. if stream_started_mins < 0:
  41. stream_started_mins = 0
  42. if stream_started_secs < 0:
  43. stream_started_secs = 0
  44. stream_duration_str = '%d minutes' % stream_started_mins
  45. if stream_started_secs:
  46. stream_duration_str += ' and %d seconds' % stream_started_secs
  47. return stream_duration_str
  48. except Exception:
  49. return "Not available"
  50. def get_user_id():
  51. is_user_id = False
  52. user_id = None
  53. try:
  54. user_id = int(pil.dl_user)
  55. is_user_id = True
  56. except ValueError:
  57. try:
  58. user_res = pil.ig_api.username_info(pil.dl_user)
  59. user_id = user_res.get('user', {}).get('pk')
  60. except ClientConnectionError as cce:
  61. logger.error(
  62. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cce.code, str(cce)))
  63. if "getaddrinfo failed" in str(cce):
  64. logger.error('Could not resolve host, check your internet connection.')
  65. if "timed out" in str(cce):
  66. logger.error('The connection timed out, check your internet connection.')
  67. except ClientThrottledError as cte:
  68. logger.error(
  69. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cte.code, str(cte)))
  70. except ClientError as ce:
  71. logger.error(
  72. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, ce.code, str(ce)))
  73. if "Not Found" in str(ce):
  74. logger.error('The specified user does not exist.')
  75. except Exception as e:
  76. logger.error("Could not get user info for '{:s}': {:s}".format(pil.dl_user, str(e)))
  77. except KeyboardInterrupt:
  78. logger.binfo("Aborted getting user info for '{:s}', exiting.".format(pil.dl_user))
  79. if user_id and is_user_id:
  80. logger.info("Getting info for '{:s}' successful. Assuming input is an user Id.".format(pil.dl_user))
  81. logger.separator()
  82. return user_id
  83. elif user_id:
  84. logger.info("Getting info for '{:s}' successful.".format(pil.dl_user))
  85. logger.separator()
  86. return user_id
  87. else:
  88. return None
  89. def get_broadcasts_info():
  90. try:
  91. user_id = get_user_id()
  92. if user_id:
  93. broadcasts = pil.ig_api.user_story_feed(user_id)
  94. pil.livestream_obj = broadcasts.get('broadcast')
  95. pil.replays_obj = broadcasts.get('post_live_item', {}).get('broadcasts', [])
  96. return True
  97. else:
  98. return False
  99. except ClientThrottledError:
  100. logger.error('Could not check because you are making too many requests at this time.')
  101. return False
  102. except Exception as e:
  103. logger.error('Could not finish checking: {:s}'.format(str(e)))
  104. if "timed out" in str(e):
  105. logger.error('The connection timed out, check your internet connection.')
  106. if "login_required" in str(e):
  107. logger.error('Login cookie was loaded but user is not actually logged in. Delete the cookie file and try '
  108. 'again.')
  109. return False
  110. except KeyboardInterrupt:
  111. logger.binfo('Aborted checking for livestreams and replays, exiting.')
  112. return False
  113. def merge_segments():
  114. try:
  115. if pil.run_at_finish:
  116. try:
  117. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_finish,))
  118. thread.daemon = True
  119. thread.start()
  120. logger.binfo("Launched finish command: {:s}".format(pil.run_at_finish))
  121. except Exception as e:
  122. logger.warn('Could not execute command: {:s}'.format(str(e)))
  123. live_mp4_file = '{}{}_{}_{}_{}_live.mp4'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  124. pil.livestream_obj.get('id'), pil.epochtime)
  125. live_segments_path = os.path.normpath(pil.broadcast_downloader.output_dir)
  126. if pil.segments_json_thread_worker and pil.segments_json_thread_worker.is_alive():
  127. pil.kill_segment_thread = True
  128. pil.segments_json_thread_worker.join()
  129. if pil.comment_thread_worker and pil.comment_thread_worker.is_alive():
  130. logger.info("Waiting for comment downloader to finish.")
  131. pil.comment_thread_worker.join()
  132. try:
  133. if not pil.skip_merge:
  134. logger.info('Merging downloaded files into video.')
  135. pil.broadcast_downloader.stitch(live_mp4_file, cleartempfiles=pil.clear_temp_files)
  136. logger.info('Successfully merged downloaded files into video.')
  137. if pil.clear_temp_files:
  138. helpers.remove_temp_folder()
  139. else:
  140. logger.binfo("Merging of downloaded files has been disabled.")
  141. logger.binfo("Use --assemble command to manually merge downloaded segments.")
  142. helpers.remove_lock()
  143. except ValueError as e:
  144. logger.separator()
  145. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  146. if os.listdir(live_segments_path):
  147. logger.separator()
  148. logger.binfo("Segment directory is not empty. Trying to merge again.")
  149. logger.separator()
  150. pil.assemble_arg = live_mp4_file.replace(".mp4", "_downloads.json")
  151. assembler.assemble(user_called=False)
  152. else:
  153. logger.separator()
  154. logger.error("Segment directory is empty. There is nothing to merge.")
  155. logger.separator()
  156. helpers.remove_lock()
  157. except Exception as e:
  158. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  159. helpers.remove_lock()
  160. except KeyboardInterrupt:
  161. logger.binfo('Aborted merging process, no video was created.')
  162. helpers.remove_lock()
  163. def download_livestream():
  164. try:
  165. def print_status(sep=True):
  166. if pil.do_heartbeat:
  167. heartbeat_info = pil.ig_api.broadcast_heartbeat_and_viewercount(pil.livestream_obj.get('id'))
  168. viewers = pil.livestream_obj.get('viewer_count', 0) + 1
  169. if sep:
  170. logger.separator()
  171. else:
  172. logger.info('Username : {:s}'.format(pil.dl_user))
  173. logger.info('Viewers : {:s} watching'.format(str(int(viewers))))
  174. logger.info('Airing time : {:s}'.format(get_stream_duration(0)))
  175. if pil.do_heartbeat:
  176. logger.info('Status : {:s}'.format(heartbeat_info.get('broadcast_status').title()))
  177. return heartbeat_info.get('broadcast_status') not in ['active', 'interrupted']
  178. else:
  179. return None
  180. mpd_url = (pil.livestream_obj.get('dash_manifest')
  181. or pil.livestream_obj.get('dash_abr_playback_url')
  182. or pil.livestream_obj.get('dash_playback_url'))
  183. pil.live_folder_path = '{}{}_{}_{}_{}_live_downloads'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  184. pil.livestream_obj.get('id'), pil.epochtime)
  185. pil.broadcast_downloader = live.Downloader(
  186. mpd=mpd_url,
  187. output_dir=pil.live_folder_path,
  188. user_agent=pil.ig_api.user_agent,
  189. max_connection_error_retry=3,
  190. duplicate_etag_retry=30,
  191. callback_check=print_status,
  192. mpd_download_timeout=3,
  193. download_timeout=3,
  194. ffmpeg_binary=pil.ffmpeg_path)
  195. except Exception as e:
  196. logger.error('Could not start downloading livestream: {:s}'.format(str(e)))
  197. logger.separator()
  198. helpers.remove_lock()
  199. try:
  200. broadcast_owner = pil.livestream_obj.get('broadcast_owner', {}).get('username')
  201. try:
  202. broadcast_guest = pil.livestream_obj.get('cobroadcasters', {})[0].get('username')
  203. except Exception:
  204. broadcast_guest = None
  205. if broadcast_owner != pil.dl_user:
  206. logger.binfo('This livestream is a dual-live, the owner is "{}".'.format(broadcast_owner))
  207. broadcast_guest = None
  208. if broadcast_guest:
  209. logger.binfo('This livestream is a dual-live, the current guest is "{}".'.format(broadcast_guest))
  210. pil.has_guest = broadcast_guest
  211. logger.separator()
  212. print_status(False)
  213. logger.separator()
  214. helpers.create_lock_folder()
  215. pil.segments_json_thread_worker = threading.Thread(target=helpers.generate_json_segments)
  216. pil.segments_json_thread_worker.start()
  217. logger.info('Downloading livestream, press [CTRL+C] to abort.')
  218. if pil.run_at_start:
  219. try:
  220. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_start,))
  221. thread.daemon = True
  222. thread.start()
  223. logger.binfo("Launched start command: {:s}".format(pil.run_at_start))
  224. except Exception as e:
  225. logger.warn('Could not launch command: {:s}'.format(str(e)))
  226. if pil.dl_comments:
  227. try:
  228. comments_json_file = '{}{}_{}_{}_{}_live_comments.json'.format(
  229. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  230. pil.comment_thread_worker = threading.Thread(target=get_live_comments, args=(comments_json_file,))
  231. pil.comment_thread_worker.start()
  232. except Exception as e:
  233. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  234. pil.broadcast_downloader.run()
  235. logger.separator()
  236. logger.info("The livestream has been ended by the user.")
  237. logger.separator()
  238. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  239. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  240. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  241. logger.separator()
  242. merge_segments()
  243. except KeyboardInterrupt:
  244. logger.separator()
  245. logger.binfo('The download has been aborted.')
  246. logger.separator()
  247. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  248. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  249. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  250. logger.separator()
  251. if not pil.broadcast_downloader.is_aborted:
  252. pil.broadcast_downloader.stop()
  253. merge_segments()
  254. def download_replays():
  255. try:
  256. try:
  257. logger.info('Amount of replays : {:s}'.format(str(len(pil.replays_obj))))
  258. for replay_index, replay_obj in enumerate(pil.replays_obj):
  259. bc_dash_manifest = parseString(replay_obj.get('dash_manifest')).getElementsByTagName('Period')
  260. bc_duration_raw = bc_dash_manifest[0].getAttribute("duration")
  261. bc_minutes = (bc_duration_raw.split("H"))[1].split("M")[0]
  262. bc_seconds = ((bc_duration_raw.split("M"))[1].split("S")[0]).split('.')[0]
  263. logger.info(
  264. 'Replay {:s} duration : {:s} minutes and {:s} seconds'.format(str(replay_index + 1), bc_minutes,
  265. bc_seconds))
  266. except Exception as e:
  267. logger.warn("An error occurred while getting replay duration information: {:s}".format(str(e)))
  268. logger.separator()
  269. logger.info("Downloading replays, press [CTRL+C] to abort.")
  270. logger.separator()
  271. for replay_index, replay_obj in enumerate(pil.replays_obj):
  272. exists = False
  273. pil.livestream_obj = replay_obj
  274. dl_path_files = os.listdir(pil.dl_path)
  275. for dl_path_file in dl_path_files:
  276. if (str(replay_obj.get('id')) in dl_path_file) and ("_replay" in dl_path_file) and (dl_path_file.endswith(".mp4")):
  277. logger.binfo("Already downloaded replay {:d} with ID '{:s}'.".format(replay_index + 1, str(replay_obj.get('id'))))
  278. exists = True
  279. if not exists:
  280. current = replay_index + 1
  281. logger.info(
  282. "Downloading replay {:s} of {:s} with ID '{:s}'.".format(str(current), str(len(pil.replays_obj)),
  283. str(replay_obj.get('id'))))
  284. pil.live_folder_path = '{}{}_{}_{}_{}_replay_downloads'.format(
  285. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), replay_obj.get("published_time"))
  286. broadcast_downloader = replay.Downloader(
  287. mpd=replay_obj.get('dash_manifest'),
  288. output_dir=pil.live_folder_path,
  289. user_agent=pil.ig_api.user_agent,
  290. ffmpeg_binary=pil.ffmpeg_path)
  291. if pil.use_locks:
  292. helpers.create_lock_folder()
  293. replay_mp4_file = '{}{}_{}_{}_{}_replay.mp4'.format(
  294. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), replay_obj.get("published_time"))
  295. comments_json_file = '{}{}_{}_{}_{}_replay_comments.json'.format(
  296. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), replay_obj.get("published_time"))
  297. pil.comment_thread_worker = threading.Thread(target=get_replay_comments, args=(comments_json_file,))
  298. broadcast_downloader.download(replay_mp4_file, cleartempfiles=pil.clear_temp_files)
  299. if pil.clear_temp_files:
  300. helpers.remove_temp_folder()
  301. if pil.dl_comments:
  302. logger.info("Downloading replay comments.")
  303. try:
  304. get_replay_comments(comments_json_file)
  305. except Exception as e:
  306. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  307. logger.info("Finished downloading replay {:s} of {:s}.".format(str(current), str(len(pil.replays_obj))))
  308. helpers.remove_lock()
  309. if current != len(pil.replays_obj):
  310. logger.separator()
  311. logger.separator()
  312. logger.info("Finished downloading all available replays.")
  313. helpers.remove_lock()
  314. except Exception as e:
  315. logger.error('Could not save replay: {:s}'.format(str(e)))
  316. helpers.remove_lock()
  317. except KeyboardInterrupt:
  318. logger.separator()
  319. logger.binfo('The download has been aborted by the user, exiting.')
  320. helpers.remove_temp_folder()
  321. helpers.remove_lock()
  322. def download_following():
  323. try:
  324. is_checking = ''
  325. if pil.dl_lives and pil.dl_replays:
  326. is_checking = 'livestreams or replays'
  327. elif pil.dl_lives and not pil.dl_replays:
  328. is_checking = 'livestreams'
  329. elif not pil.dl_lives and pil.dl_replays:
  330. is_checking = 'replays'
  331. logger.info("Checking following users for any {:s}.".format(is_checking))
  332. broadcast_f_list = pil.ig_api.reels_tray()
  333. usernames_available_livestreams = []
  334. usernames_available_replays = []
  335. if broadcast_f_list['broadcasts'] and pil.dl_lives:
  336. for broadcast_f in broadcast_f_list['broadcasts']:
  337. username = broadcast_f['broadcast_owner']['username']
  338. if username not in usernames_available_livestreams:
  339. usernames_available_livestreams.append(username)
  340. if broadcast_f_list.get('post_live', {}).get('post_live_items', []) and pil.dl_replays:
  341. for broadcast_r in broadcast_f_list.get('post_live', {}).get('post_live_items', []):
  342. for broadcast_f in broadcast_r.get("broadcasts", []):
  343. username = broadcast_f['broadcast_owner']['username']
  344. if username not in usernames_available_replays:
  345. usernames_available_replays.append(username)
  346. logger.separator()
  347. available_total = list(usernames_available_livestreams)
  348. available_total.extend(x for x in usernames_available_replays if x not in available_total)
  349. if available_total:
  350. logger.info("The following users have available {:s}.".format(is_checking))
  351. logger.info(', '.join(available_total))
  352. logger.separator()
  353. iterate_users(available_total)
  354. else:
  355. logger.info("There are currently no available {:s}.".format(is_checking))
  356. logger.separator()
  357. except Exception as e:
  358. logger.error("Could not finish checking following users: {:s}".format(str(e)))
  359. except KeyboardInterrupt:
  360. logger.separator()
  361. logger.binfo('The checking process has been aborted by the user.')
  362. logger.separator()
  363. def iterate_users(user_list):
  364. for user in user_list:
  365. try:
  366. if os.path.isfile(os.path.join(pil.dl_path, user + '.lock')):
  367. logger.warn("Lock file is already present for '{:s}', there is probably another download "
  368. "ongoing!".format(user))
  369. logger.warn(
  370. "If this is not the case, manually delete the file '{:s}' and try again.".format(user + '.lock'))
  371. else:
  372. logger.info("Launching daemon process for '{:s}'.".format(user))
  373. start_result = helpers.run_command("{:s} -d {:s} -cp '{:s}' -dp '{:s}' {:s} {:s} {:s} {:s}".format(
  374. ("'" + pil.winbuild_path + "'") if pil.winbuild_path else "pyinstalive",
  375. user,
  376. pil.config_path,
  377. pil.dl_path,
  378. '--no-lives' if not pil.dl_lives else '',
  379. '--no-replays' if not pil.dl_replays else '',
  380. '--no-heartbeat' if not pil.do_heartbeat else '',
  381. '--username {:s} --password {:s}'.format(pil.ig_user, pil.ig_pass) if pil.config_login_overridden else ''))
  382. if start_result:
  383. logger.warn("Could not start process: {:s}".format(str(start_result)))
  384. else:
  385. logger.info("Process started successfully.")
  386. logger.separator()
  387. time.sleep(2)
  388. except Exception as e:
  389. logger.warn("Could not start process: {:s}".format(str(e)))
  390. except KeyboardInterrupt:
  391. logger.binfo('The process launching has been aborted by the user.')
  392. logger.separator()
  393. break
  394. def get_live_comments(comments_json_file):
  395. try:
  396. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  397. first_comment_created_at = 0
  398. try:
  399. while not pil.broadcast_downloader.is_aborted:
  400. if 'initial_buffered_duration' not in pil.livestream_obj and pil.broadcast_downloader.initial_buffered_duration:
  401. pil.livestream_obj['initial_buffered_duration'] = pil.broadcast_downloader.initial_buffered_duration
  402. comments_downloader.broadcast = pil.livestream_obj
  403. first_comment_created_at = comments_downloader.get_live(first_comment_created_at)
  404. except ClientError as e:
  405. if not 'media has been deleted' in e.error_response:
  406. logger.warn("Comment collection ClientError: %d %s" % (e.code, e.error_response))
  407. try:
  408. if comments_downloader.comments:
  409. comments_downloader.save()
  410. comments_log_file = comments_json_file.replace('.json', '.log')
  411. comment_errors, total_comments = CommentsDownloader.generate_log(
  412. comments_downloader.comments, pil.epochtime, comments_log_file,
  413. comments_delay=pil.broadcast_downloader.initial_buffered_duration)
  414. if len(comments_downloader.comments) == 1:
  415. logger.info("Successfully saved 1 comment.")
  416. #os.remove(comments_json_file)
  417. logger.separator()
  418. return True
  419. else:
  420. if comment_errors:
  421. logger.warn(
  422. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  423. str(total_comments), str(comment_errors)))
  424. else:
  425. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  426. #os.remove(comments_json_file)
  427. logger.separator()
  428. return True
  429. else:
  430. logger.info("There are no available comments to save.")
  431. logger.separator()
  432. return False
  433. except Exception as e:
  434. logger.error('Could not save comments: {:s}'.format(str(e)))
  435. return False
  436. except KeyboardInterrupt as e:
  437. logger.binfo("Downloading livestream comments has been aborted.")
  438. return False
  439. def get_replay_comments(comments_json_file):
  440. try:
  441. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  442. comments_downloader.get_replay()
  443. try:
  444. if comments_downloader.comments:
  445. comments_log_file = comments_json_file.replace('.json', '.log')
  446. comment_errors, total_comments = CommentsDownloader.generate_log(
  447. comments_downloader.comments, pil.livestream_obj.get('published_time'), comments_log_file,
  448. comments_delay=0)
  449. if total_comments == 1:
  450. logger.info("Successfully saved 1 comment to logfile.")
  451. #os.remove(comments_json_file)
  452. logger.separator()
  453. return True
  454. else:
  455. if comment_errors:
  456. logger.warn(
  457. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  458. str(total_comments), str(comment_errors)))
  459. else:
  460. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  461. #os.remove(comments_json_file)
  462. logger.separator()
  463. return True
  464. else:
  465. logger.info("There are no available comments to save.")
  466. return False
  467. except Exception as e:
  468. logger.error('Could not save comments to logfile: {:s}'.format(str(e)))
  469. return False
  470. except KeyboardInterrupt as e:
  471. logger.binfo("Downloading replay comments has been aborted.")
  472. return False