dlfuncs.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. import os
  2. import shutil
  3. import json
  4. import threading
  5. import time
  6. from xml.dom.minidom import parseString
  7. from instagram_private_api import ClientConnectionError
  8. from instagram_private_api import ClientError
  9. from instagram_private_api import ClientThrottledError
  10. from instagram_private_api_extensions import live
  11. from instagram_private_api_extensions import replay
  12. try:
  13. import logger
  14. import helpers
  15. import pil
  16. import dlfuncs
  17. import assembler
  18. from constants import Constants
  19. from comments import CommentsDownloader
  20. except ImportError:
  21. from . import logger
  22. from . import helpers
  23. from . import pil
  24. from . import assembler
  25. from . import dlfuncs
  26. from .constants import Constants
  27. from .comments import CommentsDownloader
  28. def get_stream_duration(duration_type):
  29. try:
  30. # For some reason the published_time is roughly 40 seconds behind real world time
  31. if duration_type == 0: # Airtime duration
  32. stream_started_mins, stream_started_secs = divmod((int(time.time()) - pil.livestream_obj.get("published_time") + 40), 60)
  33. if duration_type == 1: # Download duration
  34. stream_started_mins, stream_started_secs = divmod((int(time.time()) - int(pil.epochtime)), 60)
  35. if duration_type == 2: # Missing duration
  36. if (int(pil.epochtime) - pil.livestream_obj.get("published_time") + 40) <= 0:
  37. stream_started_mins, stream_started_secs = 0, 0 # Download started 'earlier' than actual broadcast, assume started at the same time instead
  38. else:
  39. stream_started_mins, stream_started_secs = divmod((int(pil.epochtime) - pil.livestream_obj.get("published_time") + 40), 60)
  40. if stream_started_mins < 0:
  41. stream_started_mins = 0
  42. if stream_started_secs < 0:
  43. stream_started_secs = 0
  44. stream_duration_str = '%d minutes' % stream_started_mins
  45. if stream_started_secs:
  46. stream_duration_str += ' and %d seconds' % stream_started_secs
  47. return stream_duration_str
  48. except Exception as e:
  49. return "Not available"
  50. def get_user_id():
  51. is_user_id = False
  52. user_id = None
  53. try:
  54. user_id = int(pil.dl_user)
  55. is_user_id = True
  56. except ValueError:
  57. try:
  58. user_res = pil.ig_api.username_info(pil.dl_user)
  59. user_id = user_res.get('user', {}).get('pk')
  60. except ClientConnectionError as cce:
  61. logger.error(
  62. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cce.code, str(cce)))
  63. if "getaddrinfo failed" in str(cce):
  64. logger.error('Could not resolve host, check your internet connection.')
  65. if "timed out" in str(cce):
  66. logger.error('The connection timed out, check your internet connection.')
  67. except ClientThrottledError as cte:
  68. logger.error(
  69. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, cte.code, str(cte)))
  70. except ClientError as ce:
  71. logger.error(
  72. "Could not get user info for '{:s}': {:d} {:s}".format(pil.dl_user, ce.code, str(ce)))
  73. if "Not Found" in str(ce):
  74. logger.error('The specified user does not exist.')
  75. except Exception as e:
  76. logger.error("Could not get user info for '{:s}': {:s}".format(pil.dl_user, str(e)))
  77. except KeyboardInterrupt:
  78. logger.binfo("Aborted getting user info for '{:s}', exiting.".format(pil.dl_user))
  79. if user_id and is_user_id:
  80. logger.info("Getting info for '{:s}' successful. Assuming input is an user Id.".format(pil.dl_user))
  81. logger.separator()
  82. return user_id
  83. elif user_id:
  84. logger.info("Getting info for '{:s}' successful.".format(pil.dl_user))
  85. logger.separator()
  86. return user_id
  87. else:
  88. return None
  89. def get_broadcasts_info():
  90. try:
  91. user_id = get_user_id()
  92. if user_id:
  93. broadcasts = pil.ig_api.user_story_feed(user_id)
  94. pil.livestream_obj = broadcasts.get('broadcast')
  95. pil.replays_obj = broadcasts.get('post_live_item', {}).get('broadcasts', [])
  96. return True
  97. else:
  98. return False
  99. except Exception as e:
  100. logger.error('Could not finish checking: {:s}'.format(str(e)))
  101. if "timed out" in str(e):
  102. logger.error('The connection timed out, check your internet connection.')
  103. if "login_required" in str(e):
  104. logger.error('Login cookie was loaded but user is not actually logged in. Delete the cookie file and try '
  105. 'again.')
  106. return False
  107. except KeyboardInterrupt:
  108. logger.binfo('Aborted checking for livestreams and replays, exiting.'.format(pil.dl_user))
  109. return False
  110. except ClientThrottledError as cte:
  111. logger.error('Could not check because you are making too many requests at this time.')
  112. return False
  113. def merge_segments():
  114. try:
  115. if pil.run_at_finish:
  116. try:
  117. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_finish,))
  118. thread.daemon = True
  119. thread.start()
  120. logger.binfo("Launched finish command: {:s}".format(pil.run_at_finish))
  121. except Exception as e:
  122. logger.warn('Could not execute command: {:s}'.format(str(e)))
  123. live_mp4_file = '{}{}_{}_{}_{}_live.mp4'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  124. pil.livestream_obj.get('id'), pil.epochtime)
  125. live_segments_path = os.path.normpath(pil.broadcast_downloader.output_dir)
  126. if pil.segments_json_thread_worker and pil.segments_json_thread_worker.is_alive():
  127. pil.segments_json_thread_worker.join()
  128. if pil.comment_thread_worker and pil.comment_thread_worker.is_alive():
  129. logger.info("Waiting for comment downloader to finish.")
  130. pil.comment_thread_worker.join()
  131. logger.info('Merging downloaded files into video.')
  132. try:
  133. pil.broadcast_downloader.stitch(live_mp4_file, cleartempfiles=pil.clear_temp_files)
  134. logger.info('Successfully merged downloaded files into video.')
  135. if pil.clear_temp_files:
  136. helpers.remove_temp_folder()
  137. helpers.remove_lock()
  138. except ValueError as e:
  139. logger.separator()
  140. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  141. if os.listdir(live_segments_path):
  142. logger.separator()
  143. logger.binfo("Segment directory is not empty. Trying to merge again.")
  144. logger.separator()
  145. pil.assemble_arg = live_mp4_file.replace(".mp4", "_downloads.json")
  146. assembler.assemble(user_called=False)
  147. else:
  148. logger.separator()
  149. logger.error("Segment directory is empty. There is nothing to merge.")
  150. logger.separator()
  151. helpers.remove_lock()
  152. except Exception as e:
  153. logger.error('Could not merge downloaded files: {:s}'.format(str(e)))
  154. helpers.remove_lock()
  155. except KeyboardInterrupt:
  156. logger.binfo('Aborted merging process, no video was created.')
  157. helpers.remove_lock()
  158. def download_livestream():
  159. try:
  160. def print_status(sep=True):
  161. if pil.do_heartbeat:
  162. heartbeat_info = pil.ig_api.broadcast_heartbeat_and_viewercount(pil.livestream_obj.get('id'))
  163. viewers = pil.livestream_obj.get('viewer_count', 0)
  164. if sep:
  165. logger.separator()
  166. else:
  167. logger.info('Username : {:s}'.format(pil.dl_user))
  168. logger.info('Viewers : {:s} watching'.format(str(int(viewers))))
  169. logger.info('Airing time : {:s}'.format(get_stream_duration(0)))
  170. if pil.do_heartbeat:
  171. logger.info('Status : {:s}'.format(heartbeat_info.get('broadcast_status').title()))
  172. return heartbeat_info.get('broadcast_status') not in ['active', 'interrupted']
  173. else:
  174. return None
  175. mpd_url = (pil.livestream_obj.get('dash_manifest')
  176. or pil.livestream_obj.get('dash_abr_playback_url')
  177. or pil.livestream_obj.get('dash_playback_url'))
  178. pil.live_folder_path = '{}{}_{}_{}_{}_live_downloads'.format(pil.dl_path, pil.datetime_compat, pil.dl_user,
  179. pil.livestream_obj.get('id'), pil.epochtime)
  180. pil.broadcast_downloader = live.Downloader(
  181. mpd=mpd_url,
  182. output_dir=pil.live_folder_path,
  183. user_agent=pil.ig_api.user_agent,
  184. max_connection_error_retry=3,
  185. duplicate_etag_retry=30,
  186. callback_check=print_status,
  187. mpd_download_timeout=3,
  188. download_timeout=3,
  189. ffmpeg_binary=pil.ffmpeg_path)
  190. except Exception as e:
  191. logger.error('Could not start downloading livestream: {:s}'.format(str(e)))
  192. logger.separator()
  193. helpers.remove_lock()
  194. try:
  195. broadcast_owner = pil.livestream_obj.get('broadcast_owner', {}).get('username')
  196. try:
  197. broadcast_guest = pil.livestream_obj.get('cobroadcasters', {})[0].get('username')
  198. except Exception:
  199. broadcast_guest = None
  200. if broadcast_owner != pil.dl_user:
  201. logger.binfo('This livestream is a dual-live, the owner is "{}".'.format(broadcast_owner))
  202. broadcast_guest = None
  203. if broadcast_guest:
  204. logger.binfo('This livestream is a dual-live, the current guest is "{}".'.format(broadcast_guest))
  205. pil.has_guest = broadcast_guest
  206. logger.separator()
  207. print_status(False)
  208. logger.separator()
  209. helpers.create_lock_folder()
  210. pil.segments_json_thread_worker = threading.Thread(target=helpers.generate_json_segments)
  211. pil.segments_json_thread_worker.start()
  212. logger.info('Downloading livestream, press [CTRL+C] to abort.')
  213. if pil.run_at_start:
  214. try:
  215. thread = threading.Thread(target=helpers.run_command, args=(pil.run_at_start,))
  216. thread.daemon = True
  217. thread.start()
  218. logger.binfo("Launched start command: {:s}".format(pil.run_at_start))
  219. except Exception as e:
  220. logger.warn('Could not launch command: {:s}'.format(str(e)))
  221. if pil.dl_comments:
  222. try:
  223. comments_json_file = '{}{}_{}_{}_{}_live_comments.json'.format(
  224. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  225. pil.comment_thread_worker = threading.Thread(target=get_live_comments, args=(comments_json_file,))
  226. pil.comment_thread_worker.start()
  227. except Exception as e:
  228. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  229. pil.broadcast_downloader.run()
  230. logger.separator()
  231. logger.info("The livestream has been ended by the user.")
  232. logger.separator()
  233. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  234. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  235. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  236. logger.separator()
  237. merge_segments()
  238. except KeyboardInterrupt:
  239. logger.separator()
  240. logger.binfo('The download has been aborted.')
  241. logger.separator()
  242. logger.info('Airtime duration : {}'.format(get_stream_duration(0)))
  243. logger.info('Download duration : {}'.format(get_stream_duration(1)))
  244. logger.info('Missing (approx.) : {}'.format(get_stream_duration(2)))
  245. logger.separator()
  246. if not pil.broadcast_downloader.is_aborted:
  247. pil.broadcast_downloader.stop()
  248. merge_segments()
  249. def download_replays():
  250. try:
  251. try:
  252. logger.info('Amount of replays : {:s}'.format(str(len(pil.replays_obj))))
  253. for replay_index, replay_obj in enumerate(pil.replays_obj):
  254. bc_dash_manifest = parseString(replay_obj.get('dash_manifest')).getElementsByTagName('Period')
  255. bc_duration_raw = bc_dash_manifest[0].getAttribute("duration")
  256. bc_minutes = (bc_duration_raw.split("H"))[1].split("M")[0]
  257. bc_seconds = ((bc_duration_raw.split("M"))[1].split("S")[0]).split('.')[0]
  258. logger.info(
  259. 'Replay {:s} duration : {:s} minutes and {:s} seconds'.format(str(replay_index + 1), bc_minutes,
  260. bc_seconds))
  261. except Exception as e:
  262. logger.warn("An error occurred while getting replay duration information: {:s}".format(str(e)))
  263. logger.separator()
  264. logger.info("Downloading replays, press [CTRL+C] to abort.")
  265. logger.separator()
  266. for replay_index, replay_obj in enumerate(pil.replays_obj):
  267. exists = False
  268. pil.livestream_obj = replay_obj
  269. if Constants.PYTHON_VER[0][0] == '2':
  270. directories = (os.walk(pil.dl_path).next()[1])
  271. else:
  272. directories = (os.walk(pil.dl_path).__next__()[1])
  273. for directory in directories:
  274. if (str(replay_obj.get('id')) in directory) and ("_live_" not in directory):
  275. logger.binfo("Already downloaded a replay with ID '{:s}'.".format(str(replay_obj.get('id'))))
  276. exists = True
  277. if not exists:
  278. current = replay_index + 1
  279. logger.info(
  280. "Downloading replay {:s} of {:s} with ID '{:s}'.".format(str(current), str(len(pil.replays_obj)),
  281. str(replay_obj.get('id'))))
  282. pil.live_folder_path = '{}{}_{}_{}_{}_replay_downloads'.format(
  283. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  284. broadcast_downloader = replay.Downloader(
  285. mpd=replay_obj.get('dash_manifest'),
  286. output_dir=pil.live_folder_path,
  287. user_agent=pil.ig_api.user_agent,
  288. ffmpeg_binary=pil.ffmpeg_path)
  289. if pil.use_locks:
  290. helpers.create_lock_folder()
  291. replay_mp4_file = '{}{}_{}_{}_{}_replay.mp4'.format(
  292. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  293. comments_json_file = '{}{}_{}_{}_{}_replay_comments.json'.format(
  294. pil.dl_path, pil.datetime_compat, pil.dl_user, pil.livestream_obj.get('id'), pil.epochtime)
  295. pil.comment_thread_worker = threading.Thread(target=get_replay_comments, args=(comments_json_file,))
  296. broadcast_downloader.download(replay_mp4_file, cleartempfiles=pil.clear_temp_files)
  297. if pil.clear_temp_files:
  298. helpers.remove_temp_folder()
  299. if pil.dl_comments:
  300. logger.info("Downloading replay comments.")
  301. try:
  302. get_replay_comments(comments_json_file)
  303. except Exception as e:
  304. logger.error('An error occurred while downloading comments: {:s}'.format(str(e)))
  305. logger.info("Finished downloading replay {:s} of {:s}.".format(str(current), str(len(pil.replays_obj))))
  306. helpers.remove_lock()
  307. if current != len(pil.replays_obj):
  308. logger.separator()
  309. logger.separator()
  310. logger.info("Finished downloading all available replays.")
  311. helpers.remove_lock()
  312. except Exception as e:
  313. logger.error('Could not save replay: {:s}'.format(str(e)))
  314. helpers.remove_lock()
  315. except KeyboardInterrupt:
  316. logger.separator()
  317. logger.binfo('The download has been aborted by the user, exiting.')
  318. logger.separator()
  319. helpers.remove_temp_folder()
  320. helpers.remove_lock()
  321. def download_following():
  322. try:
  323. is_checking = ''
  324. if pil.dl_lives and pil.dl_replays:
  325. is_checking = 'livestreams or replays'
  326. elif pil.dl_lives and not pil.dl_replays:
  327. is_checking = 'livestreams'
  328. elif not pil.dl_lives and pil.dl_replays:
  329. is_checking = 'replays'
  330. logger.info("Checking following users for any {:s}.".format(is_checking))
  331. broadcast_f_list = pil.ig_api.reels_tray()
  332. usernames_available_livestreams = []
  333. usernames_available_replays = []
  334. if broadcast_f_list['broadcasts'] and pil.dl_lives:
  335. for broadcast_f in broadcast_f_list['broadcasts']:
  336. username = broadcast_f['broadcast_owner']['username']
  337. if username not in usernames_available_livestreams:
  338. usernames_available_livestreams.append(username)
  339. if broadcast_f_list.get('post_live', {}).get('post_live_items', []) and pil.dl_replays:
  340. for broadcast_r in broadcast_f_list.get('post_live', {}).get('post_live_items', []):
  341. for broadcast_f in broadcast_r.get("broadcasts", []):
  342. username = broadcast_f['broadcast_owner']['username']
  343. if username not in usernames_available_replays:
  344. usernames_available_replays.append(username)
  345. logger.separator()
  346. available_total = list(usernames_available_livestreams)
  347. available_total.extend(x for x in usernames_available_replays if x not in available_total)
  348. if available_total:
  349. logger.info("The following users have available {:s}.".format(is_checking))
  350. logger.info(', '.join(available_total))
  351. logger.separator()
  352. iterate_users(available_total)
  353. else:
  354. logger.info("There are currently no available {:s}.".format(is_checking))
  355. logger.separator()
  356. except Exception as e:
  357. logger.error("Could not finish checking following users: {:s}".format(str(e)))
  358. except KeyboardInterrupt:
  359. logger.separator()
  360. logger.binfo('The checking process has been aborted by the user.')
  361. logger.separator()
  362. def iterate_users(user_list):
  363. for user in user_list:
  364. try:
  365. if os.path.isfile(os.path.join(pil.dl_path, user + '.lock')):
  366. logger.warn("Lock file is already present for '{:s}', there is probably another download "
  367. "ongoing!".format(user))
  368. logger.warn(
  369. "If this is not the case, manually delete the file '{:s}' and try again.".format(user + '.lock'))
  370. else:
  371. logger.info("Launching daemon process for '{:s}'.".format(user))
  372. start_result = helpers.run_command("pyinstalive -d {:s} -cp '{:s}' -dp '{:s}' {:s} {:s} {:s}".format(
  373. user,
  374. pil.config_path,
  375. pil.dl_path,
  376. '--no-lives' if not pil.dl_lives else '',
  377. '--no-replays' if not pil.dl_replays else '',
  378. '--no-heartbeat' if not pil.do_heartbeat else ''))
  379. if start_result:
  380. logger.warn("Could not start process: {:s}".format(str(start_result)))
  381. else:
  382. logger.info("Process started successfully.")
  383. logger.separator()
  384. time.sleep(2)
  385. except Exception as e:
  386. logger.warn("Could not start process: {:s}".format(str(e)))
  387. except KeyboardInterrupt:
  388. logger.binfo('The process launching has been aborted by the user.')
  389. logger.separator()
  390. break
  391. def get_live_comments(comments_json_file):
  392. try:
  393. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  394. first_comment_created_at = 0
  395. try:
  396. while not pil.broadcast_downloader.is_aborted:
  397. if 'initial_buffered_duration' not in pil.livestream_obj and pil.broadcast_downloader.initial_buffered_duration:
  398. pil.livestream_obj['initial_buffered_duration'] = pil.broadcast_downloader.initial_buffered_duration
  399. comments_downloader.broadcast = pil.livestream_obj
  400. first_comment_created_at = comments_downloader.get_live(first_comment_created_at)
  401. except ClientError as e:
  402. if not 'media has been deleted' in e.error_response:
  403. logger.warn("Comment collection ClientError: %d %s" % (e.code, e.error_response))
  404. try:
  405. if comments_downloader.comments:
  406. comments_downloader.save()
  407. comments_log_file = comments_json_file.replace('.json', '.log')
  408. comment_errors, total_comments = CommentsDownloader.generate_log(
  409. comments_downloader.comments, pil.epochtime, comments_log_file,
  410. comments_delay=pil.broadcast_downloader.initial_buffered_duration)
  411. if len(comments_downloader.comments) == 1:
  412. logger.info("Successfully saved 1 comment.")
  413. os.remove(comments_json_file)
  414. logger.separator()
  415. return True
  416. else:
  417. if comment_errors:
  418. logger.warn(
  419. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  420. str(total_comments), str(comment_errors)))
  421. else:
  422. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  423. os.remove(comments_json_file)
  424. logger.separator()
  425. return True
  426. else:
  427. logger.info("There are no available comments to save.")
  428. logger.separator()
  429. return False
  430. except Exception as e:
  431. logger.error('Could not save comments: {:s}'.format(str(e)))
  432. return False
  433. except KeyboardInterrupt as e:
  434. logger.binfo("Downloading livestream comments has been aborted.")
  435. return False
  436. def get_replay_comments(comments_json_file):
  437. try:
  438. comments_downloader = CommentsDownloader(destination_file=comments_json_file)
  439. comments_downloader.get_replay()
  440. try:
  441. if comments_downloader.comments:
  442. comments_log_file = comments_json_file.replace('.json', '.log')
  443. comment_errors, total_comments = CommentsDownloader.generate_log(
  444. comments_downloader.comments, pil.livestream_obj.get('published_time'), comments_log_file,
  445. comments_delay=0)
  446. if total_comments == 1:
  447. logger.info("Successfully saved 1 comment to logfile.")
  448. os.remove(comments_json_file)
  449. logger.separator()
  450. return True
  451. else:
  452. if comment_errors:
  453. logger.warn(
  454. "Successfully saved {:s} comments but {:s} comments are (partially) missing.".format(
  455. str(total_comments), str(comment_errors)))
  456. else:
  457. logger.info("Successfully saved {:s} comments.".format(str(total_comments)))
  458. os.remove(comments_json_file)
  459. logger.separator()
  460. return True
  461. else:
  462. logger.info("There are no available comments to save.")
  463. return False
  464. except Exception as e:
  465. logger.error('Could not save comments to logfile: {:s}'.format(str(e)))
  466. return False
  467. except KeyboardInterrupt as e:
  468. logger.binfo("Downloading replay comments has been aborted.")
  469. return False