replay_dl.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. # Copyright (c) 2017 https://github.com/ping
  2. #
  3. # This software is released under the MIT License.
  4. # https://opensource.org/licenses/MIT
  5. import argparse
  6. import logging
  7. import os
  8. import re
  9. import xml.etree.ElementTree
  10. import subprocess
  11. from contextlib import closing
  12. import requests
  13. logger = logging.getLogger(__file__)
  14. MPD_NAMESPACE = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'}
  15. class Downloader(object):
  16. """Downloads and assembles a given IG live replay stream"""
  17. USER_AGENT = 'Instagram 10.26.0 (iPhone8,1; iOS 10_2; en_US; en-US; ' \
  18. 'scale=2.00; gamut=normal; 750x1334) AppleWebKit/420+'
  19. DOWNLOAD_TIMEOUT = 15
  20. def __init__(self, mpd, output_dir, user_agent=None, **kwargs):
  21. """
  22. :param mpd: URL to mpd
  23. :param output_dir: folder to store the downloaded files
  24. :return:
  25. """
  26. self.mpd = mpd
  27. self.output_dir = output_dir
  28. if not os.path.exists(self.output_dir):
  29. os.makedirs(self.output_dir)
  30. self.user_agent = user_agent or self.USER_AGENT
  31. self.download_timeout = kwargs.pop('download_timeout', None) or self.DOWNLOAD_TIMEOUT
  32. session = requests.Session()
  33. adapter = requests.adapters.HTTPAdapter(max_retries=2)
  34. session.mount('http://', adapter)
  35. session.mount('https://', adapter)
  36. self.session = session
  37. # custom ffmpeg binary path, fallback to ffmpeg_binary path in env if available
  38. self.ffmpeg_binary = kwargs.pop('ffmpeg_binary', None) or os.getenv('FFMPEG_BINARY', 'ffmpeg')
  39. xml.etree.ElementTree.register_namespace('', MPD_NAMESPACE['mpd'])
  40. self.mpd_document = xml.etree.ElementTree.fromstring(self.mpd)
  41. duration_attribute = self.mpd_document.attrib.get('mediaPresentationDuration', '')
  42. mobj = re.match(r'PT(?P<hrs>\d+)H(?P<mins>\d+)M(?P<secs>\d+\.\d+)', duration_attribute)
  43. if mobj:
  44. duration = int(round(
  45. int(mobj.group('hrs')) * 60 * 60 +
  46. int(mobj.group('mins')) * 60 +
  47. float(mobj.group('secs'))
  48. ))
  49. else:
  50. logger.warning('Unable to parse duration: {}'.format(duration_attribute))
  51. duration = 0
  52. self.duration = duration
  53. def download(self, output_filename,
  54. skipffmpeg=False,
  55. cleartempfiles=True):
  56. """
  57. Download and saves the generated file with the file name specified.
  58. :param output_filename: Output file path
  59. :param skipffmpeg: bool flag to not use ffmpeg to join audio and video file into final mp4
  60. :param cleartempfiles: bool flag to remove downloaded and temp files
  61. :return:
  62. """
  63. periods = self.mpd_document.findall('mpd:Period', MPD_NAMESPACE)
  64. logger.debug('Found {0:d} period(s)'.format(len(periods)))
  65. generated_files = []
  66. # Aaccording to specs, multiple periods are allow but IG only sends one usually
  67. for period_idx, period in enumerate(periods):
  68. adaptation_sets = period.findall('mpd:AdaptationSet', MPD_NAMESPACE)
  69. audio_stream = None
  70. video_stream = None
  71. if not len(adaptation_sets) == 2:
  72. logger.warning('Unexpected number of adaptation sets: {}'.format(len(adaptation_sets)))
  73. for adaptation_set in adaptation_sets:
  74. representations = adaptation_set.findall('mpd:Representation', MPD_NAMESPACE)
  75. # sort representations by quality and pick best one
  76. representations = sorted(
  77. representations,
  78. key=lambda rep: (
  79. (int(rep.attrib.get('width', '0')) * int(rep.attrib.get('height', '0'))) or
  80. int(rep.attrib.get('bandwidth', '0')) or
  81. rep.attrib.get('FBQualityLabel') or
  82. int(rep.attrib.get('audioSamplingRate', '0'))),
  83. reverse=True)
  84. representation = representations[0]
  85. representation_id = representation.attrib.get('id', '')
  86. mime_type = representation.attrib.get('mimeType', '')
  87. logger.debug(
  88. 'Selected representation with mimeType {0!s} id {1!s} out of {2!s}'.format(
  89. mime_type,
  90. representation_id,
  91. ' / '.join([r.attrib.get('id', '') for r in representations])
  92. ))
  93. representation_base_url = representation.find('mpd:BaseURL', MPD_NAMESPACE).text
  94. logger.debug(representation_base_url)
  95. if 'video' in mime_type and not video_stream:
  96. video_stream = representation_base_url
  97. elif 'audio' in mime_type and not audio_stream:
  98. audio_stream = representation_base_url
  99. if audio_stream and video_stream:
  100. break
  101. audio_file = (os.path.join(self.output_dir, os.path.basename(audio_stream))).split('?')[0]
  102. video_file = (os.path.join(self.output_dir, os.path.basename(video_stream))).split('?')[0]
  103. for target in ((audio_stream, audio_file), (video_stream, video_file)):
  104. logger.debug('Downloading {} as {}'.format(*target))
  105. with closing(self.session.get(
  106. target[0],
  107. headers={'User-Agent': self.user_agent, 'Accept': '*/*'},
  108. timeout=self.download_timeout, stream=True)) as res:
  109. res.raise_for_status()
  110. with open(target[1], 'wb') as f:
  111. for chunk in res.iter_content(chunk_size=1024*100):
  112. f.write(chunk)
  113. if skipffmpeg:
  114. continue
  115. if len(periods) > 1:
  116. # Generate a new filename by appending n+1
  117. # to the original specified output filename
  118. # so that it looks like output-1.mp4, output-2.mp4, etc
  119. dir_name = os.path.dirname(output_filename)
  120. file_name = os.path.basename(output_filename)
  121. dot_pos = file_name.rfind('.')
  122. if dot_pos >= 0:
  123. filename_no_ext = file_name[0:dot_pos]
  124. ext = file_name[dot_pos:]
  125. else:
  126. filename_no_ext = file_name
  127. ext = ''
  128. generated_filename = os.path.join(
  129. dir_name, '{0!s}-{1:d}{2!s}'.format(filename_no_ext, period_idx + 1, ext))
  130. else:
  131. generated_filename = output_filename
  132. ffmpeg_loglevel = 'error'
  133. if logger.level == logging.DEBUG:
  134. ffmpeg_loglevel = 'warning'
  135. cmd = [
  136. self.ffmpeg_binary, '-y',
  137. '-loglevel', ffmpeg_loglevel,
  138. '-i', audio_file,
  139. '-i', video_file,
  140. '-c:v', 'copy',
  141. '-c:a', 'copy',
  142. generated_filename]
  143. try:
  144. exit_code = subprocess.call(cmd)
  145. if exit_code:
  146. logger.error('ffmpeg exited with the code: {0!s}'.format(exit_code))
  147. logger.error('Command: {0!s}'.format(' '.join(cmd)))
  148. continue
  149. except Exception as call_err:
  150. logger.error('ffmpeg exited with the error: {0!s}'.format(call_err))
  151. logger.error('Command: {0!s}'.format(' '.join(cmd)))
  152. continue
  153. generated_files.append(generated_filename)
  154. logger.debug('Generated {}'.format(generated_filename))
  155. if cleartempfiles:
  156. for f in (audio_file, video_file):
  157. try:
  158. os.remove(f)
  159. except (IOError, OSError) as ioe:
  160. logger.warning('Error removing {0!s}: {1!s}'.format(f, str(ioe)))
  161. return generated_files
  162. if __name__ == '__main__': # pragma: no cover
  163. # pylint: disable-all
  164. # Example of how to init and start the Downloader
  165. parser = argparse.ArgumentParser()
  166. parser.add_argument('mpd')
  167. parser.add_argument('-v', action='store_true', help='Verbose')
  168. parser.add_argument('-s', metavar='OUTPUT_FILENAME', required=True,
  169. help='Output filename')
  170. parser.add_argument('-o', metavar='DOWLOAD_DIR',
  171. default='output/', help='Download folder')
  172. parser.add_argument('-c', action='store_true', help='Clear temp files')
  173. args = parser.parse_args()
  174. if args.v:
  175. logger.setLevel(logging.DEBUG)
  176. else:
  177. logger.setLevel(logging.INFO)
  178. logging.basicConfig(level=logger.level)
  179. with open(args.mpd, 'r') as mpd_file:
  180. mpd_contents = mpd_file.read()
  181. dl = Downloader(mpd=mpd_contents, output_dir=args.o)
  182. try:
  183. generated_files = dl.download(args.s, cleartempfiles=args.c)
  184. print('Video Duration: %s' % dl.duration)
  185. print('Generated files: \n%s' % '\n'.join(generated_files))
  186. except KeyboardInterrupt as e:
  187. logger.info('Interrupted')