dlfuncs.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. import os
  2. import shutil
  3. import json
  4. import threading
  5. import time
  6. from xml.dom.minidom import parseString
  7. from instagram_private_api import ClientConnectionError
  8. from instagram_private_api import ClientError
  9. from instagram_private_api import ClientThrottledError
  10. from instagram_private_api_extensions import live
  11. from instagram_private_api_extensions import replay
  12. try:
  13. import logger
  14. import helpers
  15. import pil
  16. import dlfuncs
  17. import assembler
  18. from constants import Constants
  19. from comments import CommentsDownloader
  20. except ImportError:
  21. from . import logger
  22. from . import helpers
  23. from . import pil
  24. from . import assembler
  25. from . import dlfuncs
  26. from .constants import Constants
  27. from .comments import CommentsDownloader
  28. def get_stream_duration(duration_type):
  29. try:
  30. # For some reason the published_time is roughly 40 seconds behind real world time
  31. if duration_type == 0: # Airtime duration
  32. stream_started_mins, stream_started_secs = divmod((int(time.time()) - pil.livestream_obj.get("published_time") + 40), 60)
  33. if duration_type == 1: # Download duration
  34. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(pil.epochtime)), 60)
  35. if duration_type == 2: # Missing duration
  36. if (int(pil.epochtime) - pil.livestream_obj.get("published_time") + 40) <= 0:
  37. stream_started_mins, stream_started_secs = 0, 0 # Download started 'earlier' than actual broadcast, assume started at the same time instead
  38. else:
  39. stream_started_mins, stream_started_secs = divmod((int(pil.epochtime) - pil.livestream_obj.get("published_time") + 40), 60)
  40. if stream_started_mins < 0:
  41. stream_started_mins = 0
  42. if stream_started_secs < 0:
  43. stream_started_secs = 0
  44. stream_duration_str = '%d minutes' % stream_started_mins
  45. if stream_started_secs:
  46. stream_duration_str += ' and %d seconds' % stream_started_secs
  47. return stream_duration_str
  48. except Exception as e:
  49. return "Not available"
  50. def get_user_id():
  51. is_user_id = False
  52. user_id = None
  53. try:
  54. user_id = int(pil.dl_user)
  55. is_user_id = True
  56. except ValueError:
  57. try:
  58. user_res = pil.ig_api.username_info(pil.dl_user)
  59. user_id = user_res.get('user', {}).get('pk')
  60. except ClientConnectionError as cce:
  61. logger.error(
  62. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cce.code, str(cce)))
  63. if "getaddrinfo failed" in str(cce):
  64. logger.error('Could not resolve host, check your internet connection.')
  65. if "timed out" in str(cce):
  66. logger.error('The connection timed out, check your internet connection.')
  67. except ClientThrottledError as cte:
  68. logger.error(
  69. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cte.code, str(cte)))
  70. except ClientError as ce:
  71. logger.error(
  72. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, ce.code, str(ce)))
  73. if "Not Found" in str(ce):
  74. logger.error('The specified user does not exist.')
  75. except Exception as e:
  76. logger.error("Could not get user info for '{:s}': {:s}".format(pil.dl_user, str(e)))
  77. except KeyboardInterrupt:
  78. logger.binfo("Aborted getting user info for '{:s}', exiting.".format(pil.dl_user))
  79. if user_id and is_user_id:
  80. logger.info("Getting info for '{:s}' successful. Assuming input is an user Id.".format(pil.dl_user))
  81. logger.separator()
  82. return user_id
  83. elif user_id:
  84. logger.info("Getting info for '{:s}' successful.".format(pil.dl_user))
  85. logger.separator()
  86. return user_id
  87. else:
  88. return None
  89. def get_broadcasts_info():
  90. try:
  91. user_id = get_user_id()
  92. if user_id:
  93. broadcasts = pil.ig_api.user_story_feed(user_id)
  94. pil.livestream_obj = broadcasts.get('broadcast')
  95. pil.replays_obj = broadcasts.get('post_live_item', {}).get('broadcasts', [])
  96. return True
  97. else:
  98. return False
  99. except Exception as e:
  100. logger.error('Could not finish checking: {:s}'.format(str(e)))
  101. if "timed out" in str(e):
  102. logger.error('The connection timed out, check your internet connection.')
  103. logger.separator()
  104. return False
  105. except KeyboardInterrupt:
  106. logger.binfo('Aborted checking for livestreams and replays, exiting.'.format(pil.dl_user))
  107. logger.separator()
  108. return False
  109. except ClientThrottledError as cte:
  110. logger.error('Could not check because you are making too many requests at this time.')
  111. logger.separator()
  112. return False
  113. def merge_segments():
  114. try:
  115. if pil.run_at_finish:
  116. try:
  117. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_finish,))
  118. thread.daemon = True
  119. thread.start()
  120. logger.binfo("Launched finish command: {:s}".format(pil.run_at_finish))
  121. except Exception as e:
  122. logger.warn('Could not execute command: {:s}'.format(str(e)))
  123. live_mp4_file = '{}{}_{}_{}_{}_live.mp4'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  124. pil.livestream_obj.get('id'), pil.epochtime)
  125. live_segments_path = os.path.normpath(pil.broadcast_downloader.output_dir)
  126. if pil.segments_json_thread_worker and pil.segments_json_thread_worker.is_alive():
  127. pil.segments_json_thread_worker.join()
  128. if pil.comment_thread_worker and pil.comment_thread_worker.is_alive():
  129. logger.info("Waiting for comment downloader to finish.")
  130. pil.comment_thread_worker.join()
  131. logger.info('Merging downloaded files into video.')
  132. try:
  133. pil.broadcast_downloader.stitch(live_mp4_file, cleartempfiles=pil.clear_temp_files)
  134. logger.info('Successfully merged downloaded files into video.')
  135. if pil.clear_temp_files:
  136. helpers.remove_temp_folder()
  137. helpers.remove_lock()
  138. except ValueError as e:
  139. logger.separator()
  140. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  141. if os.listdir(live_segments_path):
  142. logger.separator()
  143. logger.binfo("Segment directory is not empty. Trying to merge again.")
  144. logger.separator()
  145. pil.assemble_arg = live_mp4_file.replace(".mp4", "_downloads.json")
  146. assembler.assemble(user_called=False)
  147. else:
  148. logger.separator()
  149. logger.error("Segment directory is empty. There is nothing to merge.")
  150. logger.separator()
  151. helpers.remove_lock()
  152. except Exception as e:
  153. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  154. helpers.remove_lock()
  155. except KeyboardInterrupt:
  156. logger.binfo('Aborted merging process, no video was created.')
  157. helpers.remove_lock()
  158. def download_livestream():
  159. try:
  160. def print_status(sep=True):
  161. heartbeat_info = pil.ig_api.broadcast_heartbeat_and_viewercount(pil.livestream_obj.get('id'))
  162. viewers = pil.livestream_obj.get('viewer_count', 0)
  163. if sep:
  164. logger.separator()
  165. else:
  166. logger.info('Username : {:s}'.format(pil.dl_user))
  167. logger.info('Viewers : {:s} watching'.format(str(int(viewers))))
  168. logger.info('Airing time : {:s}'.format(get_stream_duration(0)))
  169. logger.info('Status : {:s}'.format(heartbeat_info.get('broadcast_status').title()))
  170. return heartbeat_info.get('broadcast_status') not in ['active', 'interrupted']
  171. mpd_url = (pil.livestream_obj.get('dash_manifest')
  172. or pil.livestream_obj.get('dash_abr_playback_url')
  173. or pil.livestream_obj.get('dash_playback_url'))
  174. pil.live_folder_path = '{}{}_{}_{}_{}_live_downloads'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  175. pil.livestream_obj.get('id'), pil.epochtime)
  176. pil.broadcast_downloader = live.Downloader(
  177. mpd=mpd_url,
  178. output_dir=pil.live_folder_path,
  179. user_agent=pil.ig_api.user_agent,
  180. max_connection_error_retry=3,
  181. duplicate_etag_retry=30,
  182. callback_check=print_status,
  183. mpd_download_timeout=3,
  184. download_timeout=3,
  185. ffmpeg_binary=pil.ffmpeg_path)
  186. except Exception as e:
  187. logger.error('Could not start downloading livestream: {:s}'.format(str(e)))
  188. logger.separator()
  189. helpers.remove_lock()
  190. try:
  191. broadcast_owner = pil.livestream_obj.get('broadcast_owner', {}).get('username')
  192. try:
  193. broadcast_guest = pil.livestream_obj.get('cobroadcasters', {})[0].get('username')
  194. except Exception:
  195. broadcast_guest = None
  196. if broadcast_owner != pil.dl_user:
  197. logger.binfo('This livestream is a dual-live, the owner is "{}".'.format(broadcast_owner))
  198. broadcast_guest = None
  199. if broadcast_guest:
  200. logger.binfo('This livestream is a dual-live, the current guest is "{}".'.format(broadcast_guest))
  201. pil.has_guest = broadcast_guest
  202. logger.separator()
  203. print_status(False)
  204. logger.separator()
  205. helpers.create_lock_folder()
  206. pil.segments_json_thread_worker = threading.Thread(target=helpers.generate_json_segments)
  207. pil.segments_json_thread_worker.start()
  208. logger.info('Downloading livestream, press [CTRL+C] to abort.')
  209. if pil.run_at_start:
  210. try:
  211. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_start,))
  212. thread.daemon = True
  213. thread.start()
  214. logger.binfo("Launched start command: {:s}".format(pil.run_at_start))
  215. except Exception as e:
  216. logger.warn('Could not launch command: {:s}'.format(str(e)))
  217. if pil.dl_comments:
  218. try:
  219. comments_json_file = '{}{}_{}_{}_{}_live_comments.json'.format(
  220. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  221. pil.comment_thread_worker = threading.Thread(target=get_live_comments, args=(comments_json_file,))
  222. pil.comment_thread_worker.start()
  223. except Exception as e:
  224. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  225. pil.broadcast_downloader.run()
  226. logger.separator()
  227. logger.info("The livestream has been ended by the user.")
  228. logger.separator()
  229. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  230. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  231. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  232. logger.separator()
  233. merge_segments()
  234. except KeyboardInterrupt:
  235. logger.separator()
  236. logger.binfo('The download has been aborted.')
  237. logger.separator()
  238. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  239. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  240. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  241. logger.separator()
  242. if not pil.broadcast_downloader.is_aborted:
  243. pil.broadcast_downloader.stop()
  244. merge_segments()
  245. def download_replays():
  246. try:
  247. try:
  248. logger.info('Amount of replays : {:s}'.format(str(len(pil.replays_obj))))
  249. for replay_index, replay_obj in enumerate(pil.replays_obj):
  250. bc_dash_manifest = parseString(replay_obj.get('dash_manifest')).getElementsByTagName('Period')
  251. bc_duration_raw = bc_dash_manifest[0].getAttribute("duration")
  252. bc_minutes = (bc_duration_raw.split("H"))[1].split("M")[0]
  253. bc_seconds = ((bc_duration_raw.split("M"))[1].split("S")[0]).split('.')[0]
  254. logger.info(
  255. 'Replay {:s} duration : {:s} minutes and {:s} seconds'.format(str(replay_index + 1), bc_minutes,
  256. bc_seconds))
  257. except Exception as e:
  258. logger.warn("An error occurred while getting replay duration information: {:s}".format(str(e)))
  259. logger.separator()
  260. logger.info("Downloading replays, press [CTRL+C] to abort.")
  261. logger.separator()
  262. for replay_index, replay_obj in enumerate(pil.replays_obj):
  263. exists = False
  264. pil.livestream_obj = replay_obj
  265. if Constants.PYTHON_VER[0][0] == '2':
  266. directories = (os.walk(pil.dl_path).next()[1])
  267. else:
  268. directories = (os.walk(pil.dl_path).__next__()[1])
  269. for directory in directories:
  270. if (str(replay_obj.get('id')) in directory) and ("_live_" not in directory):
  271. logger.binfo("Already downloaded a replay with ID '{:s}'.".format(str(replay_obj.get('id'))))
  272. exists = True
  273. if not exists:
  274. current = replay_index + 1
  275. logger.info(
  276. "Downloading replay {:s} of {:s} with ID '{:s}'.".format(str(current), str(len(pil.replays_obj)),
  277. str(replay_obj.get('id'))))
  278. pil.live_folder_path = '{}{}_{}_{}_{}_replay_downloads'.format(
  279. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  280. broadcast_downloader = replay.Downloader(
  281. mpd=replay_obj.get('dash_manifest'),
  282. output_dir=pil.live_folder_path,
  283. user_agent=pil.ig_api.user_agent,
  284. ffmpeg_binary=pil.ffmpeg_path)
  285. if pil.use_locks:
  286. helpers.create_lock_folder()
  287. replay_mp4_file = '{}{}_{}_{}_{}_replay.mp4'.format(
  288. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  289. comments_json_file = '{}{}_{}_{}_{}_replay_comments.json'.format(
  290. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  291. pil.comment_thread_worker = threading.Thread(target=get_replay_comments, args=(comments_json_file,))
  292. broadcast_downloader.download(replay_mp4_file, cleartempfiles=pil.clear_temp_files)
  293. if pil.clear_temp_files:
  294. helpers.remove_temp_folder()
  295. if pil.dl_comments:
  296. logger.info("Downloading replay comments.")
  297. try:
  298. get_replay_comments(comments_json_file)
  299. except Exception as e:
  300. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  301. logger.info("Finished downloading replay {:s} of {:s}.".format(str(current), str(len(pil.replays_obj))))
  302. helpers.remove_lock()
  303. if current != len(pil.replays_obj):
  304. logger.separator()
  305. logger.separator()
  306. logger.info("Finished downloading all available replays.")
  307. helpers.remove_lock()
  308. except Exception as e:
  309. logger.error('Could not save replay: {:s}'.format(str(e)))
  310. helpers.remove_lock()
  311. except KeyboardInterrupt:
  312. logger.separator()
  313. logger.binfo('The download has been aborted by the user, exiting.')
  314. logger.separator()
  315. helpers.remove_temp_folder()
  316. helpers.remove_lock()
  317. def download_following():
  318. try:
  319. is_checking = ''
  320. if pil.dl_lives and pil.dl_replays:
  321. is_checking = 'livestreams or replays'
  322. elif pil.dl_lives and not pil.dl_replays:
  323. is_checking = 'livestreams'
  324. elif not pil.dl_lives and pil.dl_replays:
  325. is_checking = 'replays'
  326. logger.info("Checking following users for any {:s}.".format(is_checking))
  327. broadcast_f_list = pil.ig_api.reels_tray()
  328. usernames_available_livestreams = []
  329. usernames_available_replays = []
  330. if broadcast_f_list['broadcasts'] and pil.dl_lives:
  331. for broadcast_f in broadcast_f_list['broadcasts']:
  332. username = broadcast_f['broadcast_owner']['username']
  333. if username not in usernames_available_livestreams:
  334. usernames_available_livestreams.append(username)
  335. if broadcast_f_list.get('post_live', {}).get('post_live_items', []) and pil.dl_replays:
  336. for broadcast_r in broadcast_f_list.get('post_live', {}).get('post_live_items', []):
  337. for broadcast_f in broadcast_r.get("broadcasts", []):
  338. username = broadcast_f['broadcast_owner']['username']
  339. if username not in usernames_available_replays:
  340. usernames_available_replays.append(username)
  341. logger.separator()
  342. available_total = list(usernames_available_livestreams)
  343. available_total.extend(x for x in usernames_available_replays if x not in available_total)
  344. if available_total:
  345. logger.info("The following users have available {:s}.".format(is_checking))
  346. logger.info(', '.join(available_total))
  347. logger.separator()
  348. for user in available_total:
  349. try:
  350. if os.path.isfile(os.path.join(pil.dl_path, user + '.lock')):
  351. logger.warn("Lock file is already present for '{:s}', there is probably another download "
  352. "ongoing!".format(user))
  353. logger.warn("If this is not the case, manually delete the file '{:s}' and try again.".format(user + '.lock'))
  354. else:
  355. logger.info("Launching daemon process for '{:s}'.".format(user))
  356. start_result = helpers.run_command("pyinstalive -d {:s} -cp '{:s}' -dp '{:s}' {:s} {:s}".format(
  357. user, pil.config_path, pil.dl_path,
  358. '--no-lives' if not pil.dl_lives else '', '--no-replays' if not pil.dl_replays else ''))
  359. if start_result:
  360. logger.warn("Could not start process: {:s}".format(str(start_result)))
  361. else:
  362. logger.info("Process started successfully.")
  363. logger.separator()
  364. time.sleep(2)
  365. except Exception as e:
  366. logger.warn("Could not start processs: {:s}".format(str(e)))
  367. except KeyboardInterrupt:
  368. logger.binfo('The process launching has been aborted by the user.')
  369. logger.separator()
  370. else:
  371. logger.info("There are currently no available {:s}.".format(is_checking))
  372. logger.separator()
  373. except Exception as e:
  374. logger.error("Could not finish checking following users: {:s}".format(str(e)))
  375. except KeyboardInterrupt:
  376. logger.separator()
  377. logger.binfo('The checking process has been aborted by the user.')
  378. logger.separator()
  379. def get_live_comments(comments_json_file):
  380. try:
  381. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  382. first_comment_created_at = 0
  383. try:
  384. while not pil.broadcast_downloader.is_aborted:
  385. if 'initial_buffered_duration' not in pil.livestream_obj and pil.broadcast_downloader.initial_buffered_duration:
  386. pil.livestream_obj['initial_buffered_duration'] = pil.broadcast_downloader.initial_buffered_duration
  387. comments_downloader.broadcast = pil.livestream_obj
  388. first_comment_created_at = comments_downloader.get_live(first_comment_created_at)
  389. except ClientError as e:
  390. if not 'media has been deleted' in e.error_response:
  391. logger.warn("Comment collection ClientError: %d %s" % (e.code, e.error_response))
  392. try:
  393. if comments_downloader.comments:
  394. comments_downloader.save()
  395. comments_log_file = comments_json_file.replace('.json', '.log')
  396. comment_errors, total_comments = CommentsDownloader.generate_log(
  397. comments_downloader.comments, pil.epochtime, comments_log_file,
  398. comments_delay=pil.broadcast_downloader.initial_buffered_duration)
  399. if len(comments_downloader.comments) == 1:
  400. logger.info("Successfully saved 1 comment.")
  401. os.remove(comments_json_file)
  402. logger.separator()
  403. return True
  404. else:
  405. if comment_errors:
  406. logger.warn(
  407. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  408. str(total_comments), str(comment_errors)))
  409. else:
  410. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  411. os.remove(comments_json_file)
  412. logger.separator()
  413. return True
  414. else:
  415. logger.info("There are no available comments to save.")
  416. logger.separator()
  417. return False
  418. except Exception as e:
  419. logger.error('Could not save comments: {:s}'.format(str(e)))
  420. return False
  421. except KeyboardInterrupt as e:
  422. logger.binfo("Downloading livestream comments has been aborted.")
  423. return False
  424. def get_replay_comments(comments_json_file):
  425. try:
  426. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  427. comments_downloader.get_replay()
  428. try:
  429. if comments_downloader.comments:
  430. comments_log_file = comments_json_file.replace('.json', '.log')
  431. comment_errors, total_comments = CommentsDownloader.generate_log(
  432. comments_downloader.comments, pil.livestream_obj.get('published_time'), comments_log_file,
  433. comments_delay=0)
  434. if total_comments == 1:
  435. logger.info("Successfully saved 1 comment to logfile.")
  436. os.remove(comments_json_file)
  437. logger.separator()
  438. return True
  439. else:
  440. if comment_errors:
  441. logger.warn(
  442. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  443. str(total_comments), str(comment_errors)))
  444. else:
  445. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  446. os.remove(comments_json_file)
  447. logger.separator()
  448. return True
  449. else:
  450. logger.info("There are no available comments to save.")
  451. return False
  452. except Exception as e:
  453. logger.error('Could not save comments to logfile: {:s}'.format(str(e)))
  454. return False
  455. except KeyboardInterrupt as e:
  456. logger.binfo("Downloading replay comments has been aborted.")
  457. return False