forked from Github/guess-filename.py
added ORF Mediathek pattern when original filename is missing
This commit is contained in:
parent
9650e813c3
commit
fabfc6d29a
2 changed files with 73 additions and 44 deletions
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
PROG_VERSION = u"Time-stamp: <2018-06-16 11:36:09 vk>"
|
||||
PROG_VERSION = u"Time-stamp: <2018-11-01 11:24:17 vk>"
|
||||
|
||||
|
||||
# TODO:
|
||||
|
|
@ -183,7 +183,7 @@ class GuessFilename(object):
|
|||
# of file name length restrictions, this RegEx is a fall-back in order to
|
||||
# recognize the situation.
|
||||
MEDIATHEKVIEW_SHORT_REGEX_STRING = DATESTAMP_REGEX + 'T?' + TIMESTAMP_REGEX + \
|
||||
' (.+) - (.+) - (.+) -ORIGINAL- ' # e.g., "20180510T090000 ORF - ZIB - Signation -ORIGINAL- "
|
||||
' (.+) - (.+) - (.+) -ORIGINAL(hd|low)?- ' # e.g., "20180510T090000 ORF - ZIB - Signation -ORIGINAL- "
|
||||
MEDIATHEKVIEW_SHORT_REGEX = re.compile(MEDIATHEKVIEW_SHORT_REGEX_STRING + '(.+).mp4')
|
||||
|
||||
# MediathekView was able to generate the full length file name including
|
||||
|
|
@ -562,9 +562,9 @@ class GuessFilename(object):
|
|||
by the file names of the ORF company offering its download file names.
|
||||
"""
|
||||
|
||||
if quality_string == 'Q4A':
|
||||
if quality_string == 'Q4A' or quality_string == 'LOW':
|
||||
return 'lowquality'
|
||||
elif quality_string == 'Q6A' or quality_string == 'Q8C':
|
||||
elif quality_string == 'Q6A' or quality_string == 'Q8C' or quality_string == 'HD':
|
||||
return 'highquality'
|
||||
else:
|
||||
return 'UNKNOWNQUALITY'
|
||||
|
|
@ -722,18 +722,18 @@ class GuessFilename(object):
|
|||
|
||||
qualityindicator = regex_match.group(len(regex_match.groups())).upper()
|
||||
qualitytag = self.translate_ORF_quality_string_to_tag(qualityindicator)
|
||||
start_hrs = regex_match.group(15)
|
||||
start_min = regex_match.group(16)
|
||||
start_sec = regex_match.group(17)
|
||||
end_hrs = regex_match.group(20)
|
||||
end_min = regex_match.group(21)
|
||||
end_sec = regex_match.group(22)
|
||||
start_hrs = regex_match.group(16)
|
||||
start_min = regex_match.group(17)
|
||||
start_sec = regex_match.group(18)
|
||||
end_hrs = regex_match.group(21)
|
||||
end_min = regex_match.group(22)
|
||||
end_sec = regex_match.group(23)
|
||||
self.warn_if_ORF_file_seems_to_small_according_to_duration_and_quality_indicator(oldfilename, qualityindicator,
|
||||
start_hrs, start_min, start_sec,
|
||||
end_hrs, end_min, end_sec)
|
||||
if regex_match.group(13):
|
||||
# the file name contained the optional chunk time-stamp(s)
|
||||
MEDIATHEKVIEW_LONG_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 15, '.', 16, '.', 17, ' ', 8, ' - ', 9, ' - ', 10, ' -- ', qualitytag, '.mp4']
|
||||
MEDIATHEKVIEW_LONG_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 16, '.', 17, '.', 18, ' ', 8, ' - ', 9, ' - ', 10, ' -- ', qualitytag, '.mp4']
|
||||
else:
|
||||
# the file name did NOT contain the optional chunk time-stamp(s), so we have to use the main time-stamp
|
||||
MEDIATHEKVIEW_LONG_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 4, '.', 5, '.', 6, ' ', 8, ' - ', 9, ' - ', 10, ' -- ', qualitytag, '.mp4']
|
||||
|
|
@ -806,6 +806,20 @@ class GuessFilename(object):
|
|||
logging.warn('I recognized a MediathekView file which has a cut-off time-stamp because ' +
|
||||
'of file name length restrictions.\nYou can fix it manually:')
|
||||
|
||||
if regex_match.group(12) == 'playlist.m3u8' and len(regex_match.group(11)) > 0:
|
||||
# We got this simple case of failing to get "original filename" from MediathekView download source:
|
||||
# '20181028T201400 ORF - Tatort - Tatort_ Blut -ORIGINALhd- playlist.m3u8.mp4'
|
||||
# There is NO original filename containing the starting time :-(
|
||||
# (see unit tests for details)
|
||||
|
||||
# "lowquality" or "highquality" or "UNKNOWNQUALITY"
|
||||
qualitytag = self.translate_ORF_quality_string_to_tag(regex_match.group(11).upper())
|
||||
|
||||
return self.build_string_via_indexgroups(regex_match, [1, '-', 2, '-', 3, 'T', 4, '.', 5, '.', 7, ' ', 8, ' - ', 9, ' - ', 10, ' -- ', qualitytag, '.mp4'])
|
||||
|
||||
else:
|
||||
# we got the ability to derive starting time from "original filename"
|
||||
|
||||
url_valid = False
|
||||
while not url_valid:
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8; mode: python; -*-
|
||||
# Time-stamp: <2018-06-15 21:06:40 vk>
|
||||
# Time-stamp: <2018-11-01 11:17:03 vk>
|
||||
|
||||
import unittest
|
||||
import logging
|
||||
|
|
@ -184,6 +184,21 @@ class TestGuessFilename(unittest.TestCase):
|
|||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('2018-06-14_2330_sd_06_Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2)_____13979992__o__1310584704__s14316464_4__ORF3HD_23301620P_00302415P_Q8C.mp4'),
|
||||
'2018-06-14T23.30.16 Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2) -- highquality.mp4')
|
||||
|
||||
# ORF TV Mediathek as of 2018-11-01: when there is no original filename with %N, I have to use the data I've got
|
||||
# see https://github.com/mediathekview/MServer/issues/436
|
||||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20181028T201400 ORF - Tatort - Tatort_ Blut -ORIGINALhd- playlist.m3u8.mp4'),
|
||||
'2018-10-28T20.14.00 ORF - Tatort - Tatort_ Blut -- highquality.mp4')
|
||||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20181028T201400 ORF - Tatort - Tatort_ Blut -ORIGINALlow- playlist.m3u8.mp4'),
|
||||
'2018-10-28T20.14.00 ORF - Tatort - Tatort_ Blut -- lowquality.mp4')
|
||||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20181022T211100 ORF - Thema - Das Essen der Zukunft -ORIGINALhd- playlist.m3u8.mp4'),
|
||||
'2018-10-22T21.11.00 ORF - Thema - Das Essen der Zukunft -- highquality.mp4')
|
||||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20181022T211100 ORF - Thema - Das Essen der Zukunft -ORIGINALlow- playlist.m3u8.mp4'),
|
||||
'2018-10-22T21.11.00 ORF - Thema - Das Essen der Zukunft -- lowquality.mp4')
|
||||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20181025T210500 ORF - Am Schauplatz - Am Schauplatz_ Wenn alles zusammenbricht -ORIGINALhd- playlist.m3u8.mp4'),
|
||||
'2018-10-25T21.05.00 ORF - Am Schauplatz - Am Schauplatz_ Wenn alles zusammenbricht -- highquality.mp4')
|
||||
self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename('20181025T210500 ORF - Am Schauplatz - Am Schauplatz_ Wenn alles zusammenbricht -ORIGINALlow- playlist.m3u8.mp4'),
|
||||
'2018-10-25T21.05.00 ORF - Am Schauplatz - Am Schauplatz_ Wenn alles zusammenbricht -- lowquality.mp4')
|
||||
|
||||
|
||||
# self.assertEqual(self.guess_filename.derive_new_filename_from_old_filename(''),
|
||||
# '')
|
||||
|
|
|
|||
Loading…
Reference in a new issue