123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- # Copyright (c) 2017 https://github.com/ping
- #
- # This software is released under the MIT License.
- # https://opensource.org/licenses/MIT
- import argparse
- import logging
- import os
- import re
- import xml.etree.ElementTree
- import subprocess
- from contextlib import closing
- import requests
- logger = logging.getLogger(__file__)
- MPD_NAMESPACE = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'}
- class Downloader(object):
- """Downloads and assembles a given IG live replay stream"""
- USER_AGENT = 'Instagram 10.26.0 (iPhone8,1; iOS 10_2; en_US; en-US; ' \
- 'scale=2.00; gamut=normal; 750x1334) AppleWebKit/420+'
- DOWNLOAD_TIMEOUT = 15
- def __init__(self, mpd, output_dir, user_agent=None, **kwargs):
- """
- :param mpd: URL to mpd
- :param output_dir: folder to store the downloaded files
- :return:
- """
- self.mpd = mpd
- self.output_dir = output_dir
- if not os.path.exists(self.output_dir):
- os.makedirs(self.output_dir)
- self.user_agent = user_agent or self.USER_AGENT
- self.download_timeout = kwargs.pop('download_timeout', None) or self.DOWNLOAD_TIMEOUT
- session = requests.Session()
- adapter = requests.adapters.HTTPAdapter(max_retries=2)
- session.mount('http://', adapter)
- session.mount('https://', adapter)
- self.session = session
- # custom ffmpeg binary path, fallback to ffmpeg_binary path in env if available
- self.ffmpeg_binary = kwargs.pop('ffmpeg_binary', None) or os.getenv('FFMPEG_BINARY', 'ffmpeg')
- xml.etree.ElementTree.register_namespace('', MPD_NAMESPACE['mpd'])
- self.mpd_document = xml.etree.ElementTree.fromstring(self.mpd)
- duration_attribute = self.mpd_document.attrib.get('mediaPresentationDuration', '')
- mobj = re.match(r'PT(?P<hrs>\d+)H(?P<mins>\d+)M(?P<secs>\d+\.\d+)', duration_attribute)
- if mobj:
- duration = int(round(
- int(mobj.group('hrs')) * 60 * 60 +
- int(mobj.group('mins')) * 60 +
- float(mobj.group('secs'))
- ))
- else:
- logger.warning('Unable to parse duration: {}'.format(duration_attribute))
- duration = 0
- self.duration = duration
- def download(self, output_filename,
- skipffmpeg=False,
- cleartempfiles=True):
- """
- Download and saves the generated file with the file name specified.
- :param output_filename: Output file path
- :param skipffmpeg: bool flag to not use ffmpeg to join audio and video file into final mp4
- :param cleartempfiles: bool flag to remove downloaded and temp files
- :return:
- """
- periods = self.mpd_document.findall('mpd:Period', MPD_NAMESPACE)
- logger.debug('Found {0:d} period(s)'.format(len(periods)))
- generated_files = []
- # Aaccording to specs, multiple periods are allow but IG only sends one usually
- for period_idx, period in enumerate(periods):
- adaptation_sets = period.findall('mpd:AdaptationSet', MPD_NAMESPACE)
- audio_stream = None
- video_stream = None
- if not len(adaptation_sets) == 2:
- logger.warning('Unexpected number of adaptation sets: {}'.format(len(adaptation_sets)))
- for adaptation_set in adaptation_sets:
- representations = adaptation_set.findall('mpd:Representation', MPD_NAMESPACE)
- # sort representations by quality and pick best one
- representations = sorted(
- representations,
- key=lambda rep: (
- (int(rep.attrib.get('width', '0')) * int(rep.attrib.get('height', '0'))) or
- int(rep.attrib.get('bandwidth', '0')) or
- rep.attrib.get('FBQualityLabel') or
- int(rep.attrib.get('audioSamplingRate', '0'))),
- reverse=True)
- representation = representations[0]
- representation_id = representation.attrib.get('id', '')
- mime_type = representation.attrib.get('mimeType', '')
- logger.debug(
- 'Selected representation with mimeType {0!s} id {1!s} out of {2!s}'.format(
- mime_type,
- representation_id,
- ' / '.join([r.attrib.get('id', '') for r in representations])
- ))
- representation_base_url = representation.find('mpd:BaseURL', MPD_NAMESPACE).text
- logger.debug(representation_base_url)
- if 'video' in mime_type and not video_stream:
- video_stream = representation_base_url
- elif 'audio' in mime_type and not audio_stream:
- audio_stream = representation_base_url
- if audio_stream and video_stream:
- break
- audio_file = (os.path.join(self.output_dir, os.path.basename(audio_stream))).split('?')[0]
- video_file = (os.path.join(self.output_dir, os.path.basename(video_stream))).split('?')[0]
- for target in ((audio_stream, audio_file), (video_stream, video_file)):
- logger.debug('Downloading {} as {}'.format(*target))
- with closing(self.session.get(
- target[0],
- headers={'User-Agent': self.user_agent, 'Accept': '*/*'},
- timeout=self.download_timeout, stream=True)) as res:
- res.raise_for_status()
- with open(target[1], 'wb') as f:
- for chunk in res.iter_content(chunk_size=1024*100):
- f.write(chunk)
- if skipffmpeg:
- continue
- if len(periods) > 1:
- # Generate a new filename by appending n+1
- # to the original specified output filename
- # so that it looks like output-1.mp4, output-2.mp4, etc
- dir_name = os.path.dirname(output_filename)
- file_name = os.path.basename(output_filename)
- dot_pos = file_name.rfind('.')
- if dot_pos >= 0:
- filename_no_ext = file_name[0:dot_pos]
- ext = file_name[dot_pos:]
- else:
- filename_no_ext = file_name
- ext = ''
- generated_filename = os.path.join(
- dir_name, '{0!s}-{1:d}{2!s}'.format(filename_no_ext, period_idx + 1, ext))
- else:
- generated_filename = output_filename
- ffmpeg_loglevel = 'error'
- if logger.level == logging.DEBUG:
- ffmpeg_loglevel = 'warning'
- cmd = [
- self.ffmpeg_binary, '-y',
- '-loglevel', ffmpeg_loglevel,
- '-i', audio_file,
- '-i', video_file,
- '-c:v', 'copy',
- '-c:a', 'copy',
- generated_filename]
- try:
- exit_code = subprocess.call(cmd)
- if exit_code:
- logger.error('ffmpeg exited with the code: {0!s}'.format(exit_code))
- logger.error('Command: {0!s}'.format(' '.join(cmd)))
- continue
- except Exception as call_err:
- logger.error('ffmpeg exited with the error: {0!s}'.format(call_err))
- logger.error('Command: {0!s}'.format(' '.join(cmd)))
- continue
- generated_files.append(generated_filename)
- logger.debug('Generated {}'.format(generated_filename))
- if cleartempfiles:
- for f in (audio_file, video_file):
- try:
- os.remove(f)
- except (IOError, OSError) as ioe:
- logger.warning('Error removing {0!s}: {1!s}'.format(f, str(ioe)))
- return generated_files
- if __name__ == '__main__': # pragma: no cover
- # pylint: disable-all
- # Example of how to init and start the Downloader
- parser = argparse.ArgumentParser()
- parser.add_argument('mpd')
- parser.add_argument('-v', action='store_true', help='Verbose')
- parser.add_argument('-s', metavar='OUTPUT_FILENAME', required=True,
- help='Output filename')
- parser.add_argument('-o', metavar='DOWLOAD_DIR',
- default='output/', help='Download folder')
- parser.add_argument('-c', action='store_true', help='Clear temp files')
- args = parser.parse_args()
- if args.v:
- logger.setLevel(logging.DEBUG)
- else:
- logger.setLevel(logging.INFO)
- logging.basicConfig(level=logger.level)
- with open(args.mpd, 'r') as mpd_file:
- mpd_contents = mpd_file.read()
- dl = Downloader(mpd=mpd_contents, output_dir=args.o)
- try:
- generated_files = dl.download(args.s, cleartempfiles=args.c)
- print('Video Duration: %s' % dl.duration)
- print('Generated files: \n%s' % '\n'.join(generated_files))
- except KeyboardInterrupt as e:
- logger.info('Interrupted')
|