From 04eb63b4cdb1feebd5f19189370ab9e1b9c39663 Mon Sep 17 00:00:00 2001 From: kermit Date: Tue, 29 Sep 2020 19:36:26 +0100 Subject: [PATCH 1/3] Set a timeout and use session connection pool for getting file sizes --- deemix/api/deezer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deemix/api/deezer.py b/deemix/api/deezer.py index aa322e3..3c31047 100755 --- a/deemix/api/deezer.py +++ b/deemix/api/deezer.py @@ -81,7 +81,9 @@ class Deezer: def get_track_filesizes(self, sng_id): try: - response = requests.post("https://www.deezer.com/", headers=self.http_headers) + response = requests.post("https://www.deezer.com/", + headers=self.http_headers, + timeout=30) guest_sid = response.cookies.get('sid') site = requests.post( "https://api.deezer.com/1.0/gateway.php", From bb989539639adf87ec2062edce473855ed458146 Mon Sep 17 00:00:00 2001 From: kermit Date: Tue, 29 Sep 2020 19:39:17 +0100 Subject: [PATCH 2/3] Fix streamTrack Fixes several cases: - When connection error thrown on retry, and subsequent retry started from the beginning, not the partial start - Clone headers instead of adding Range header to dz.http_headers variables - this was causing errors on other requests - Use 'with self.dz.session.get' to make sure request cleaned up on failure - Fix calculation of downloadPercentage for retries: complete is now the size of the retry, so percentage needs to account for start value --- deemix/app/downloadjob.py | 75 ++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/deemix/app/downloadjob.py b/deemix/app/downloadjob.py index eb0372c..eb3bed2 100644 --- a/deemix/app/downloadjob.py +++ b/deemix/app/downloadjob.py @@ -642,43 +642,54 @@ class DownloadJob: return error_num # fallback is enabled and loop went through all formats - def streamTrack(self, stream, track, range=None): + def streamTrack(self, stream, track, start=0): if self.queueItem.cancel: raise DownloadCancelled - try: - headers=self.dz.http_headers - if range is not None: - headers['Range'] = range - request = self.dz.session.get(track.downloadUrl, headers=self.dz.http_headers, stream=True, timeout=10) - except request_exception.ConnectionError: - eventlet.sleep(2) - return self.streamTrack(stream, track) - request.raise_for_status() - blowfish_key = str.encode(self.dz._get_blowfish_key(str(track.id))) - complete = int(request.headers["Content-Length"]) - if complete == 0: - raise DownloadEmpty - chunkLength = 0 + headers=dict(self.dz.http_headers) + if range != 0: + headers['Range'] = f'bytes={start}-' + chunkLength = start percentage = 0 + try: - for chunk in request.iter_content(2048 * 3): - eventlet.sleep(0) - if self.queueItem.cancel: raise DownloadCancelled - if len(chunk) >= 2048: - chunk = Blowfish.new(blowfish_key, Blowfish.MODE_CBC, b"\x00\x01\x02\x03\x04\x05\x06\x07").decrypt(chunk[0:2048]) + chunk[2048:] - stream.write(chunk) - chunkLength += len(chunk) - if isinstance(self.queueItem, QISingle): - percentage = (chunkLength / complete) * 100 - self.downloadPercentage = percentage + with self.dz.session.get(track.downloadUrl, headers=headers, stream=True, timeout=10) as request: + request.raise_for_status() + + blowfish_key = str.encode(self.dz._get_blowfish_key(str(track.id))) + + complete = int(request.headers["Content-Length"]) + if complete == 0: + raise DownloadEmpty + if start != 0: + responseRange = request.headers["Content-Range"] + logger.info(f'{track.title} downloading range {responseRange}') else: - chunkProgres = (len(chunk) / complete) / self.queueItem.size * 100 - self.downloadPercentage += chunkProgres - self.updatePercentage() - except SSLError: - range = f'bytes={chunkLength}-' - logger.info(f'retrying {track.title} with range {range}') - return self.streamTrack(stream, track, range) + logger.info(f'{track.title} downloading {complete} bytes') + + for chunk in request.iter_content(2048 * 3): + if self.queueItem.cancel: raise DownloadCancelled + + if len(chunk) >= 2048: + chunk = Blowfish.new(blowfish_key, Blowfish.MODE_CBC, b"\x00\x01\x02\x03\x04\x05\x06\x07").decrypt(chunk[0:2048]) + chunk[2048:] + + stream.write(chunk) + chunkLength += len(chunk) + + if isinstance(self.queueItem, QISingle): + percentage = (chunkLength / (complete + start)) * 100 + self.downloadPercentage = percentage + else: + chunkProgres = (len(chunk) / (complete + start)) / self.queueItem.size * 100 + self.downloadPercentage += chunkProgres + + self.updatePercentage() + + except SSLError as e: + logger.info(f'retrying {track.title} from byte {chunkLength}') + return self.streamTrack(stream, track, chunkLength) + except (request_exception.ConnectionError, requests.exceptions.ReadTimeout): + eventlet.sleep(2) + return self.streamTrack(stream, track, start) def updatePercentage(self): if round(self.downloadPercentage) != self.lastPercentage and round(self.downloadPercentage) % 2 == 0: From 1969d3ca7ebdc0afc3232df4c4c0eb9265284513 Mon Sep 17 00:00:00 2001 From: kermit Date: Tue, 29 Sep 2020 19:49:12 +0100 Subject: [PATCH 3/3] Use HEAD not GET to test track url, and send User-Agent Means we can avoid stream=True and the issues around returning the connection to the pool detailed here: https://requests.readthedocs.io/en/latest/user/advanced/#body-content-workflow --- deemix/app/downloadjob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deemix/app/downloadjob.py b/deemix/app/downloadjob.py index eb3bed2..4d948c0 100644 --- a/deemix/app/downloadjob.py +++ b/deemix/app/downloadjob.py @@ -616,7 +616,7 @@ class DownloadJob: if int(track.filesizes[f"FILESIZE_{format}"]) != 0: return format_num elif not track.filesizes[f"FILESIZE_{format}_TESTED"]: - request = get(self.dz.get_track_stream_url(track.id, track.MD5, track.mediaVersion, format_num), stream=True) + request = requests.head(self.dz.get_track_stream_url(track.id, track.MD5, track.mediaVersion, format_num), headers={'User-Agent': USER_AGENT_HEADER}, timeout=30) try: request.raise_for_status() return format_num