forked from Github/guess-filename.py
added MEDIATHEKVIEW_RAW_REGEX_STRING
for raw ORF MediathekView downloads as a fall-back when wget/curl download has to replace malfunctioning MediathekView
This commit is contained in:
parent
085cbe156e
commit
09bcc1acb5
2 changed files with 61 additions and 4 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
PROG_VERSION = u"Time-stamp: <2018-06-10 22:38:36 vk>"
|
PROG_VERSION = u"Time-stamp: <2018-06-15 21:10:01 vk>"
|
||||||
|
|
||||||
|
|
||||||
# TODO:
|
# TODO:
|
||||||
|
|
@ -182,7 +182,8 @@ class GuessFilename(object):
|
||||||
# SHORT_REGEX: if MediathekView is NOT able to generate the full length file name because
|
# SHORT_REGEX: if MediathekView is NOT able to generate the full length file name because
|
||||||
# of file name length restrictions, this RegEx is a fall-back in order to
|
# of file name length restrictions, this RegEx is a fall-back in order to
|
||||||
# recognize the situation.
|
# recognize the situation.
|
||||||
MEDIATHEKVIEW_SHORT_REGEX_STRING = DATESTAMP_REGEX + 'T?' + TIMESTAMP_REGEX + ' (.+) - (.+) - (.+) -ORIGINAL- ' # e.g., "20180510T090000 ORF - ZIB - Signation -ORIGINAL- "
|
MEDIATHEKVIEW_SHORT_REGEX_STRING = DATESTAMP_REGEX + 'T?' + TIMESTAMP_REGEX + \
|
||||||
|
' (.+) - (.+) - (.+) -ORIGINAL- ' # e.g., "20180510T090000 ORF - ZIB - Signation -ORIGINAL- "
|
||||||
MEDIATHEKVIEW_SHORT_REGEX = re.compile(MEDIATHEKVIEW_SHORT_REGEX_STRING + '(.+).mp4')
|
MEDIATHEKVIEW_SHORT_REGEX = re.compile(MEDIATHEKVIEW_SHORT_REGEX_STRING + '(.+).mp4')
|
||||||
|
|
||||||
# MediathekView was able to generate the full length file name including
|
# MediathekView was able to generate the full length file name including
|
||||||
|
|
@ -193,6 +194,21 @@ class GuessFilename(object):
|
||||||
# example: 20180608T193000 ORF - Österreich Heute HD 10min - Das Magazin - Österreich Heute - Das Magazin -ORIGINAL- 13979231_0007_Q8C.mp4
|
# example: 20180608T193000 ORF - Österreich Heute HD 10min - Das Magazin - Österreich Heute - Das Magazin -ORIGINAL- 13979231_0007_Q8C.mp4
|
||||||
MEDIATHEKVIEW_LONG_WITHOUT_DETAILED_TIMESTAMPS_REGEX = re.compile(MEDIATHEKVIEW_SHORT_REGEX_STRING + '.+_(Q4A|Q6A|Q8C).mp4')
|
MEDIATHEKVIEW_LONG_WITHOUT_DETAILED_TIMESTAMPS_REGEX = re.compile(MEDIATHEKVIEW_SHORT_REGEX_STRING + '.+_(Q4A|Q6A|Q8C).mp4')
|
||||||
|
|
||||||
|
# Original ORF TV Mediathek download file names as a fall-back for
|
||||||
|
# raw download using wget or curl: context menu > "Film-URL
|
||||||
|
# kopieren"
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# 2018-06-14_2105_sd_02_Am-Schauplatz_-_Alles für die Katz-_____13979879__o__1907287074__s14316407_7__WEB03HD_21050604P_21533212P_Q8C.mp4
|
||||||
|
# 2018-06-14_2155_sd_06_Kottan-ermittelt - Wien Mitte_____13979903__o__1460660672__s14316392_2__ORF3HD_21570716P_23260915P_Q8C.mp4
|
||||||
|
# 2018-06-14_2330_sd_06_Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2)_____13979992__o__1310584704__s14316464_4__ORF3HD_23301620P_00302415P_Q8C.mp4
|
||||||
|
MEDIATHEKVIEW_RAW_DATETIME = DATESTAMP_REGEX + '_' + TIMESTAMP_REGEX # e.g., "2018-06-14_2105"
|
||||||
|
MEDIATHEKVIEW_RAW_TITLE = '_[a-z]{2}_\d{2}_(.+)' # e.g., "_sd_02_Am-Schauplatz_-_Alles für die Katz"
|
||||||
|
MEDIATHEKVIEW_RAW_NUMBERS = '_+\d+__o__.+_' # e.g., "_____13979879__o__1907287074__s14316407_7__WEB03HD_"
|
||||||
|
MEDIATHEKVIEW_RAW_ENDING = TIMESTAMP_REGEX + '\d\dP_' + TIMESTAMP_REGEX + '\d\dP_(Q4A|Q6A|Q8C).mp4' # e.g., "21050604P_21533212P_Q8C.mp4"
|
||||||
|
MEDIATHEKVIEW_RAW_REGEX_STRING = MEDIATHEKVIEW_RAW_DATETIME + MEDIATHEKVIEW_RAW_TITLE + \
|
||||||
|
MEDIATHEKVIEW_RAW_NUMBERS + MEDIATHEKVIEW_RAW_ENDING
|
||||||
|
|
||||||
# URL has format like: http://apasfpd.apa.at/cms-worldwide/online/7db1010b02753288e65ff61d5e1dff58/1528531468/2018-06-08_2140_tl_01_Was-gibt-es-Neu_Promifrage-gest__13979244__o__1391278651__s14313058_8__BCK1HD_22050122P_22091314P_Q4A.mp4
|
# URL has format like: http://apasfpd.apa.at/cms-worldwide/online/7db1010b02753288e65ff61d5e1dff58/1528531468/2018-06-08_2140_tl_01_Was-gibt-es-Neu_Promifrage-gest__13979244__o__1391278651__s14313058_8__BCK1HD_22050122P_22091314P_Q4A.mp4
|
||||||
# but with varying quality indicator: Q4A (low), Q6A (high), Q8C (HD)
|
# but with varying quality indicator: Q4A (low), Q6A (high), Q8C (HD)
|
||||||
# which gets parsed like:
|
# which gets parsed like:
|
||||||
|
|
@ -580,7 +596,12 @@ class GuessFilename(object):
|
||||||
return 14050000 # manually reduced size from the value of an actual downloaded file
|
return 14050000 # manually reduced size from the value of an actual downloaded file
|
||||||
elif filename == '20180610T000000 ORF - Kleinkunst - Kleinkunst_ Cordoba - Das Rückspiel (2_2) -ORIGINAL- 2018-06-10_0000_sd_06_Kleinkunst--Cor_____13979381__o__1483927235__s14313621_1__ORF3HD_23592020P_00593103P_Q8C.mp4':
|
elif filename == '20180610T000000 ORF - Kleinkunst - Kleinkunst_ Cordoba - Das Rückspiel (2_2) -ORIGINAL- 2018-06-10_0000_sd_06_Kleinkunst--Cor_____13979381__o__1483927235__s14313621_1__ORF3HD_23592020P_00593103P_Q8C.mp4':
|
||||||
return 1506829698 # from actual file
|
return 1506829698 # from actual file
|
||||||
|
elif filename == '2018-06-14_2105_sd_02_Am-Schauplatz_-_Alles für die Katz-_____13979879__o__1907287074__s14316407_7__WEB03HD_21050604P_21533212P_Q8C.mp4':
|
||||||
|
return 1214980782 # from actual file
|
||||||
|
elif filename == '2018-06-14_2155_sd_06_Kottan-ermittelt - Wien Mitte_____13979903__o__1460660672__s14316392_2__ORF3HD_21570716P_23260915P_Q8C.mp4':
|
||||||
|
return 2231522252 # from actual file
|
||||||
|
elif filename == '2018-06-14_2330_sd_06_Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2)_____13979992__o__1310584704__s14316464_4__ORF3HD_23301620P_00302415P_Q8C.mp4':
|
||||||
|
return 1506983474 # from actual file
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return os.stat(filename).st_size
|
return os.stat(filename).st_size
|
||||||
|
|
@ -718,6 +739,33 @@ class GuessFilename(object):
|
||||||
MEDIATHEKVIEW_LONG_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 4, '.', 5, '.', 6, ' ', 8, ' - ', 9, ' - ', 10, ' -- ', qualitytag, '.mp4']
|
MEDIATHEKVIEW_LONG_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 4, '.', 5, '.', 6, ' ', 8, ' - ', 9, ' - ', 10, ' -- ', qualitytag, '.mp4']
|
||||||
return self.build_string_via_indexgroups(regex_match, MEDIATHEKVIEW_LONG_INDEXGROUPS).replace('_', ' ')
|
return self.build_string_via_indexgroups(regex_match, MEDIATHEKVIEW_LONG_INDEXGROUPS).replace('_', ' ')
|
||||||
|
|
||||||
|
# MEDIATHEKVIEW_RAW_REGEX_STRING:
|
||||||
|
# MediathekView ORF raw file name
|
||||||
|
#
|
||||||
|
regex_match = re.match(self.MEDIATHEKVIEW_RAW_REGEX_STRING, oldfilename)
|
||||||
|
if regex_match:
|
||||||
|
|
||||||
|
logging.debug('Filename looks like ORF raw file name: MEDIATHEKVIEW_RAW_REGEX_STRING')
|
||||||
|
|
||||||
|
qualityindicator = regex_match.group(len(regex_match.groups())).upper()
|
||||||
|
qualitytag = self.translate_ORF_quality_string_to_tag(qualityindicator)
|
||||||
|
start_hrs = regex_match.group(9)
|
||||||
|
start_min = regex_match.group(10)
|
||||||
|
start_sec = regex_match.group(11)
|
||||||
|
end_hrs = regex_match.group(13)
|
||||||
|
end_min = regex_match.group(14)
|
||||||
|
end_sec = regex_match.group(15)
|
||||||
|
self.warn_if_ORF_file_seems_to_small_according_to_duration_and_quality_indicator(oldfilename, qualityindicator,
|
||||||
|
start_hrs, start_min, start_sec,
|
||||||
|
end_hrs, end_min, end_sec)
|
||||||
|
# transform ...
|
||||||
|
# 'Am-Schauplatz_-_Alles f\xc3\xbcr die Katz-____'
|
||||||
|
# ... into ...
|
||||||
|
# 'Am Schauplatz - Alles f\xc3\xbcr die Katz'
|
||||||
|
title = regex_match.group(8).replace('-',' ').replace('_ _',' - ').replace(' ',' - ').replace('_','').strip()
|
||||||
|
MEDIATHEKVIEW_RAW_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', start_hrs, '.', start_min, '.', start_sec, ' ', title, ' -- ', qualitytag, '.mp4']
|
||||||
|
return self.build_string_via_indexgroups(regex_match, MEDIATHEKVIEW_RAW_INDEXGROUPS)
|
||||||
|
|
||||||
# MEDIATHEKVIEW_LONG_WITHOUT_DETAILED_TIMESTAMPS_REGEX:
|
# MEDIATHEKVIEW_LONG_WITHOUT_DETAILED_TIMESTAMPS_REGEX:
|
||||||
# MediathekView was able to generate the full length file name including
|
# MediathekView was able to generate the full length file name including
|
||||||
# the full length original file name which DOES NOT contain the detailed begin- and
|
# the full length original file name which DOES NOT contain the detailed begin- and
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8; mode: python; -*-
|
# -*- coding: utf-8; mode: python; -*-
|
||||||
# Time-stamp: <2018-06-10 22:42:27 vk>
|
# Time-stamp: <2018-06-15 21:06:40 vk>
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -176,6 +176,15 @@ class TestGuessFilename(unittest.TestCase):
|
||||||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20180610T000000 ORF - Kleinkunst - Kleinkunst_ Cordoba - Das Rückspiel (2_2) -ORIGINAL- 2018-06-10_0000_sd_06_Kleinkunst--Cor_____13979381__o__1483927235__s14313621_1__ORF3HD_23592020P_00593103P_Q8C.mp4'),
|
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20180610T000000 ORF - Kleinkunst - Kleinkunst_ Cordoba - Das Rückspiel (2_2) -ORIGINAL- 2018-06-10_0000_sd_06_Kleinkunst--Cor_____13979381__o__1483927235__s14313621_1__ORF3HD_23592020P_00593103P_Q8C.mp4'),
|
||||||
'2018-06-10T23.59.20 ORF - Kleinkunst - Kleinkunst Cordoba - Das Rückspiel (2 2) -- highquality.mp4')
|
'2018-06-10T23.59.20 ORF - Kleinkunst - Kleinkunst Cordoba - Das Rückspiel (2 2) -- highquality.mp4')
|
||||||
|
|
||||||
|
# Original ORF TV Mediathek download file names (as a fall-back for raw download using wget or curl):
|
||||||
|
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('2018-06-14_2105_sd_02_Am-Schauplatz_-_Alles für die Katz-_____13979879__o__1907287074__s14316407_7__WEB03HD_21050604P_21533212P_Q8C.mp4'),
|
||||||
|
'2018-06-14T21.05.06 Am Schauplatz - Alles für die Katz -- highquality.mp4')
|
||||||
|
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('2018-06-14_2155_sd_06_Kottan-ermittelt - Wien Mitte_____13979903__o__1460660672__s14316392_2__ORF3HD_21570716P_23260915P_Q8C.mp4'),
|
||||||
|
'2018-06-14T21.57.07 Kottan ermittelt - Wien Mitte -- highquality.mp4')
|
||||||
|
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('2018-06-14_2330_sd_06_Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2)_____13979992__o__1310584704__s14316464_4__ORF3HD_23301620P_00302415P_Q8C.mp4'),
|
||||||
|
'2018-06-14T23.30.16 Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2) -- highquality.mp4')
|
||||||
|
|
||||||
|
|
||||||
# self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename(''),
|
# self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename(''),
|
||||||
# '')
|
# '')
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue