Forráskód Böngészése

Attempt to add comment downloading, some text layout improvements

notcammy 7 éve
szülő
commit
169b7f9dde
4 módosított fájl, 263 hozzáadás és 23 törlés
  1. 139 0
      pyinstalive/comments.py
  2. 99 20
      pyinstalive/downloader.py
  3. 23 2
      pyinstalive/initialize.py
  4. 2 1
      pyinstalive/settings.py

+ 139 - 0
pyinstalive/comments.py

@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+
+import time
+import json
+import codecs
+import sys
+from socket import timeout, error as SocketError
+from ssl import SSLError
+try:
+	# py2
+	from urllib2 import URLError
+	from httplib import HTTPException
+except ImportError:
+	# py3
+	from urllib.error import URLError
+	from http.client import HTTPException
+
+from instagram_private_api import ClientError
+from .logger import log, seperator
+
+"""
+This feature of PyInstaLive was originally written by https://github.com/taengstagram
+The code below and in downloader.py that's related to the comment downloading
+feature is modified by https://github.com/notcammy
+"""
+
+
+class CommentsDownloader(object):
+
+	def __init__(self, api, broadcast, destination_file):
+		self.api = api
+		self.broadcast = broadcast
+		self.destination_file = destination_file
+		self.comments = []
+
+	def get_live(self, first_comment_created_at=0):
+		comments_collected = self.comments
+
+		before_count = len(comments_collected)
+		try:
+			comments_res = self.api.broadcast_comments(
+				self.broadcast['id'], last_comment_ts=first_comment_created_at)
+			comments = comments_res.get('comments', [])
+			first_comment_created_at = (
+				comments[0]['created_at_utc'] if comments else int(time.time() - 5))
+			comments_collected.extend(comments)
+			after_count = len(comments_collected)
+			if after_count > before_count:
+				broadcast = self.broadcast.copy()
+				broadcast.pop('segments', None)     # save space
+				broadcast['comments'] = comments_collected
+				with open(self.destination_file, 'w') as outfile:
+					json.dump(broadcast, outfile, indent=2)
+			self.comments = comments_collected
+
+		except (SSLError, timeout, URLError, HTTPException, SocketError) as e:
+			log('[W] Comment collection error: %s' % e, "YELLOW")
+		except ClientError as e:
+			if e.code == 500:
+				log('[W] Comment collection ClientError: %d %s' % (e.code, e.error_response), "YELLOW")
+			elif e.code == 400 and not e.msg:
+				log('[W] Comment collection ClientError: %d %s' % (e.code, e.error_response), "YELLOW")
+			else:
+				raise e
+		finally:
+			time.sleep(4)
+		return first_comment_created_at
+
+	def get_replay(self):
+		comments_collected = []
+		starting_offset = 0
+		encoding_tag = self.broadcast['encoding_tag']
+		while True:
+			comments_res = self.api.replay_broadcast_comments(
+				self.broadcast['id'], starting_offset=starting_offset, encoding_tag=encoding_tag)
+			starting_offset = comments_res.get('ending_offset', 0)
+			comments = comments_res.get('comments', [])
+			comments_collected.extend(comments)
+			if not comments_res.get('comments') or not starting_offset:
+				break
+			time.sleep(4)
+
+		if comments_collected:
+			self.broadcast['comments'] = comments_collected
+			self.broadcast['initial_buffered_duration'] = 0
+			with open(self.destination_file, 'w') as outfile:
+				json.dump(self.broadcast, outfile, indent=2)
+		self.comments = comments_collected
+
+	def save(self):
+		broadcast = self.broadcast.copy()
+		broadcast.pop('segments', None)
+		broadcast['comments'] = self.comments
+		with open(self.destination_file, 'w') as outfile:
+			json.dump(broadcast, outfile, indent=2)
+
+	@staticmethod
+	def generate_log(comments, download_start_time, srt_file, comments_delay=10.0):
+		python_version = sys.version.split(' ')[0]
+		subtitles_timeline = {}
+		for i, c in enumerate(comments):
+			if 'offset' in c:
+				for k in c['comment'].keys():
+					c[k] = c['comment'][k]
+				c['created_at_utc'] = download_start_time + c['offset']
+			created_at_utc = str(2 * (c['created_at_utc'] // 2))
+			comment_list = subtitles_timeline.get(created_at_utc) or []
+			comment_list.append(c)
+			subtitles_timeline[created_at_utc] = comment_list
+
+		if subtitles_timeline:
+			timestamps = sorted(subtitles_timeline.keys())
+			mememe = False
+			subs = []
+			for i, tc in enumerate(timestamps):
+				t = subtitles_timeline[tc]
+				clip_start = int(tc) - int(download_start_time) + int(comments_delay)
+				if clip_start < 0:
+					clip_start = 0
+
+				srt = ''
+
+				if sys.version.split(' ')[0].startswith('2'):
+					for c in t:
+						if (c['user']['is_verified']):
+							srt += '{}{}\n\n'.format(time.strftime('%H:%M:%S\n', time.gmtime(clip_start)), '{} {}: {}'.format(c['user']['username'], "(V)", c['text'].encode('ascii', 'xmlcharrefreplace')))
+						else:
+							srt += '{}{}\n\n'.format(time.strftime('%H:%M:%S\n', time.gmtime(clip_start)), '{}: {}'.format(c['user']['username'], c['text'].encode('ascii', 'xmlcharrefreplace')))
+				else:
+					for c in t:
+							if (c['user']['is_verified']):
+								srt += '{}{}\n\n'.format(time.strftime('%H:%M:%S\n', time.gmtime(clip_start)), '{} {}: {}'.format(c['user']['username'], "(v)", c['text']))
+							else:
+								srt += '{}{}\n\n'.format(time.strftime('%H:%M:%S\n', time.gmtime(clip_start)), '{}: {}'.format(c['user']['username'], c['text']))
+
+				subs.append(srt)
+
+			with codecs.open(srt_file, 'w', 'utf-8-sig') as srt_outfile:
+				srt_outfile.write(''.join(subs))

+ 99 - 20
pyinstalive/downloader.py

@@ -6,7 +6,10 @@ import subprocess
 import threading
 
 from instagram_private_api_extensions import live, replay
+from instagram_private_api import ClientError
+
 from .logger import log, seperator
+from .comments import CommentsDownloader
 
 class NoLivestreamException(Exception):
 	pass
@@ -47,13 +50,14 @@ def get_stream_duration(broadcast):
 
 def record_stream(broadcast):
 	try:
-		def print_status():
+		def print_status(sep=True):
 			heartbeat_info = api.broadcast_heartbeat_and_viewercount(broadcast['id'])
 			viewers = broadcast.get('viewer_count', 0)
 			log('[I] Viewers     : ' + str(int(viewers)) + " watching", "GREEN")
 			log('[I] Airing time : ' + get_stream_duration(broadcast).title(), "GREEN")
 			log('[I] Status      : ' + heartbeat_info['broadcast_status'].title(), "GREEN")
-			seperator("GREEN")
+			if sep:
+				seperator("GREEN")
 			return heartbeat_info['broadcast_status'] not in ['active', 'interrupted'] 
 
 		mpd_url = (broadcast.get('dash_manifest')
@@ -72,15 +76,17 @@ def record_stream(broadcast):
 			mpd_download_timeout=5,
 			download_timeout=10)
 	except Exception as e:
-		log('[E] Could not start recording livestream: ' + str(e), "RED")
+		log('[E] Could not start downloading livestream: ' + str(e), "RED")
 		seperator("GREEN")
 		sys.exit(1)
 	try:
-		log('[I] Starting livestream recording:', "GREEN")
+		seperator("GREEN")
+		log('[I] Livestream downloading started...', "GREEN")
 		log('[I] Username    : ' + record, "GREEN")
+		print_status(False)
 		log('[I] MPD URL     : ' + mpd_url, "GREEN")
-		print_status()
-		log('[I] Recording livestream... press [CTRL+C] to abort.', "GREEN")
+		seperator("GREEN")
+		log('[I] Downloading livestream... press [CTRL+C] to abort.', "GREEN")
 
 		if (settings.run_at_start is not "None"):
 			try:
@@ -91,18 +97,35 @@ def record_stream(broadcast):
 			except Exception as e:
 				log('[W] Could not run file: ' + str(e), "YELLOW")
 
+
+
+		if settings.save_comments.title() == "True":
+			try:
+				comments_json_file = settings.save_path + '{}_{}_{}_{}_live_comments.json'.format(settings.current_date, record, broadcast['id'], settings.current_time)
+				comment_thread_worker = None
+				comment_thread_worker = threading.Thread(target=get_live_comments, args=(api, broadcast, comments_json_file, dl,))
+				comment_thread_worker.start()
+			except Exception as e:
+				log('[E] An error occurred while checking comments: ' + e, "RED")			
+
+
+
 		dl.run()
 		log('[I] The livestream has ended. (Duration: ' + get_stream_duration(broadcast) + ")", "GREEN")
-		stitch_video(dl, broadcast)
+
+		stitch_video(dl, broadcast, comment_thread_worker)
 	except KeyboardInterrupt:
-		log("", "GREEN")
+		seperator("GREEN")
 		log('[W] Download has been aborted.', "YELLOW")
-		log("", "GREEN")
+		seperator("GREEN")
 		if not dl.is_aborted:
 			dl.stop()
-			stitch_video(dl, broadcast)
+			stitch_video(dl, broadcast, comment_thread_worker)
 
-def stitch_video(dl, broadcast):
+def stitch_video(dl, broadcast, comment_thread_worker):
+	if comment_thread_worker and comment_thread_worker.is_alive():
+		log("[I] Ending comment saving process...", "GREEN")
+		comment_thread_worker.join()
 	if (settings.run_at_finish is not "None"):
 		try:
 			thread = threading.Thread(target=run_script, args=(settings.run_at_finish,))
@@ -110,7 +133,7 @@ def stitch_video(dl, broadcast):
 			thread.start()
 			log("[I] Executed file to run at finish.", "GREEN")
 		except Exception as e:
-			log('[W] Could not run file: ' + str(e), "YELLOW")
+			log('[W] Could not run file: ' + e, "YELLOW")
 
 	log('[I] Stitching downloaded files into video...', "GREEN")
 	output_file = settings.save_path + '{}_{}_{}_{}_live.mp4'.format(settings.current_date, record, broadcast['id'], settings.current_time)
@@ -119,7 +142,7 @@ def stitch_video(dl, broadcast):
 			dl.stitch(output_file, cleartempfiles=True)
 		else:
 			dl.stitch(output_file, cleartempfiles=False)
-		log('[I] Successfully stitched downloaded files.', "GREEN")
+		log('[I] Successfully stitched downloaded files into video.', "GREEN")
 		seperator("GREEN")
 		sys.exit(0)
 	except Exception as e:
@@ -129,7 +152,7 @@ def stitch_video(dl, broadcast):
 
 def get_user_info(record):
 	try:
-		log("[I] Checking user: '"+ record + "'", "GREEN")
+		log('[I] Checking user "' + record + '"...', "GREEN")
 		user_res = api.username_info(record)
 		user_id = user_res['user']['pk']
 	except Exception as e:
@@ -147,6 +170,7 @@ def get_user_info(record):
 
 def get_livestreams(user_id):
 	try:
+		seperator("GREEN")
 		log('[I] Checking for ongoing livestreams...', "GREEN")
 		broadcast = api.user_broadcast(user_id)
 		if (broadcast is None):
@@ -154,7 +178,7 @@ def get_livestreams(user_id):
 		else:
 			record_stream(broadcast)
 	except NoLivestreamException as e:
-		log('[W] ' + str(e), "YELLOW")
+		log('[I] ' + str(e), "BLUE")
 	except Exception as e:
 		if (e.__class__.__name__ is not NoLivestreamException):
 			log('[E] Could not get livestreams info: ' + str(e), "RED")
@@ -164,6 +188,7 @@ def get_livestreams(user_id):
 
 def get_replays(user_id):
 	try:
+		seperator("GREEN")
 		log('[I] Checking for available replays...', "GREEN")
 		user_story_feed = api.user_story_feed(user_id)
 		broadcasts = user_story_feed.get('post_live_item', {}).get('broadcasts', [])
@@ -197,14 +222,21 @@ def get_replays(user_id):
 					dl = replay.Downloader(
 						mpd=broadcast['dash_manifest'],
 						output_dir=output_dir,
-						user_agent=api.user_agent)
+						user_agent=api.user_agent)	
+
+
 					if settings.clear_temp_files.title() == "True":
 						replay_saved = dl.download(settings.save_path + '{}_{}_{}_{}_replay.mp4'.format(settings.current_date, record, broadcast['id'], settings.current_time), cleartempfiles=True)
 					else:
 						replay_saved = dl.download(settings.save_path + '{}_{}_{}_{}_replay.mp4'.format(settings.current_date, record, broadcast['id'], settings.current_time), cleartempfiles=False)
 					if (len(replay_saved) == 1):
 						log("[I] Finished downloading replay " + str(current) + " of "  + str(len(broadcasts)) + ".", "GREEN")
-						log("", "GREEN")
+						seperator("GREEN")
+					if settings.save_comments.title() == "True":
+						log("[I] Checking for available comments to save...", "GREEN")
+						comments_json_file = settings.save_path + '{}_{}_{}_{}_replay_comments.json'.format(settings.current_date, record, broadcast['id'], settings.current_time)
+						get_replay_comments(api, broadcast, comments_json_file, dl)
+
 					else:
 						log("[W] No output video file was made, please merge the files manually.", "RED")
 						log("[W] Check if ffmpeg is available by running ffmpeg in your terminal.", "RED")
@@ -213,7 +245,7 @@ def get_replays(user_id):
 		seperator("GREEN")
 		sys.exit(0)
 	except NoReplayException as e:
-		log('[W] ' + str(e), "YELLOW")
+		log('[I] ' + str(e), "BLUE")
 		seperator("GREEN")
 		sys.exit(0)
 	except Exception as e:
@@ -222,10 +254,57 @@ def get_replays(user_id):
 		sys.exit(1)
 	except KeyboardInterrupt:
 		log("", "GREEN")
-		log('[W] Download has been aborted.', "YELLOW")
+		log('[W] Download has been aborted by the user.', "YELLOW")
 		try:
 			shutil.rmtree(output_dir)
 		except Exception as e:
 			log("[E] Could not remove temp folder: " + str(e), "RED")
 			sys.exit(1)
-		sys.exit(0)
+		sys.exit(0)
+
+
+def get_replay_comments(api, broadcast, comments_json_file, dl):
+	cdl = CommentsDownloader(
+		api=api, broadcast=broadcast, destination_file=comments_json_file)
+	cdl.get_replay()
+
+	if cdl.comments:
+		comments_log_file = comments_json_file.replace('.json', '.log')
+		CommentsDownloader.generate_log(
+			cdl.comments, broadcast['published_time'], comments_log_file,
+			comments_delay=0)
+		log("[I] Successfully saved comments to logfile.", "GREEN")
+		seperator("GREEN")
+	else:
+		log("[I] There are no available comments to save.", "GREEN")
+		seperator("GREEN")
+
+def get_live_comments(api, broadcast, comments_json_file, dl):
+	cdl = CommentsDownloader(
+		api=api, broadcast=broadcast, destination_file=comments_json_file)
+	first_comment_created_at = 0
+	try:
+		while not dl.is_aborted:
+			if 'initial_buffered_duration' not in broadcast and dl.initial_buffered_duration:
+				broadcast['initial_buffered_duration'] = dl.initial_buffered_duration
+				cdl.broadcast = broadcast
+			first_comment_created_at = cdl.get_live(first_comment_created_at)
+	except ClientError as e:
+		if not 'media has been deleted' in e.error_response:
+			log("[W] Comment collection ClientError: %d %s" % (e.code, e.error_response), "YELLOW")
+
+	try:
+		if cdl.comments:
+			log("[I] Checking for available comments to save...", "GREEN")
+			cdl.save()
+			comments_log_file = comments_json_file.replace('.json', '.log')
+			CommentsDownloader.generate_log(
+				cdl.comments, settings.current_time, comments_log_file,
+				comments_delay=dl.initial_buffered_duration)
+			log("[I] Successfully saved comments to logfile.", "GREEN")
+			seperator("GREEN")
+		else:
+			log("[I] There are no available comments to save.", "GREEN")
+			seperator("GREEN")
+	except Exception as e:
+		log('[E] Could not save comments to logfile: ' + str(e), "RED")

+ 23 - 2
pyinstalive/initialize.py

@@ -50,6 +50,7 @@ def check_config_validity(config):
 			settings.clear_temp_files = 'true'
 
 
+
 		try:
 			settings.save_replays = config.get('pyinstalive', 'save_replays').title()
 			if not settings.save_replays in bool_values:
@@ -59,9 +60,11 @@ def check_config_validity(config):
 			log("[W] Invalid or missing setting detected for 'save_replays', using default value (True)", "YELLOW")
 			settings.save_replays = 'true'
 
+
+
 		try:
 			settings.run_at_start = config.get('pyinstalive', 'run_at_start')
-			if (len(settings.run_at_start) > 0):
+			if (settings.run_at_start):
 				if not os.path.isfile(settings.run_at_start):
 					log("[W] Path to file given for 'run_at_start' does not exist, using default value (None)", "YELLOW")
 					settings.run_at_start = "None"
@@ -69,13 +72,17 @@ def check_config_validity(config):
 					if not settings.run_at_start.split('.')[-1] == 'py':
 						log("[W] File given for 'run_at_start' is not a Python script, using default value (None)", "YELLOW")
 						settings.run_at_start = "None"
+			else:
+				settings.run_at_start = "None"
 		except:
 			log("[W] Invalid or missing settings detected for 'run_at_start', using default value (None)", "YELLOW")
 			settings.run_at_start = "None"
 
+
+
 		try:
 			settings.run_at_finish = config.get('pyinstalive', 'run_at_finish')
-			if (len(settings.run_at_finish) > 0):
+			if (settings.run_at_finish):
 				if not os.path.isfile(settings.run_at_finish):
 					log("[W] Path to file given for 'run_at_finish' does not exist, using default value (None)", "YELLOW")
 					settings.run_at_finish = "None"
@@ -83,10 +90,24 @@ def check_config_validity(config):
 					if not settings.run_at_finish.split('.')[-1] == 'py':
 						log("[W] File given for 'run_at_finish' is not a Python script, using default value (None)", "YELLOW")
 						settings.run_at_finish = "None"
+			else:
+				settings.run_at_finish = "None"
+
 		except:
 			log("[W] Invalid or missing settings detected for 'run_at_finish', using default value (None)", "YELLOW")
 			settings.run_at_finish = "None"
 
+
+		try:
+			settings.save_comments = config.get('pyinstalive', 'save_comments').title()
+			if not settings.show_cookie_expiry in bool_values:
+				log("[W] Invalid or missing setting detected for 'save_comments', using default value (True)", "YELLOW")
+				settings.save_comments = 'true'
+		except:
+			log("[W] Invalid or missing setting detected for 'save_comments', using default value (True)", "YELLOW")
+			settings.save_comments = 'true'
+
+
 		try:
 			settings.save_path = config.get('pyinstalive', 'save_path')
 

+ 2 - 1
pyinstalive/settings.py

@@ -10,4 +10,5 @@ class settings:
 	current_date = time.strftime("%Y%m%d")
 	save_replays = "true"
 	run_at_start = "None"
-	run_at_finish = "None"
+	run_at_finish = "None"
+	save_comments = "true"