added new support for MediathekView/ORF

This commit is contained in:
Karl Voit 2018-05-10 17:07:37 +02:00
parent f738a5b304
commit a15533ebc1
2 changed files with 85 additions and 16 deletions

View file

@ -34,7 +34,7 @@ customer numbers or phone numbers, amounts to pay, and so on.
** Usage
#+BEGIN_SRC sh :results output :wrap src
./guessfilename/__init__.py --help
guessfilename --help
#+END_SRC
#+BEGIN_src
@ -67,6 +67,44 @@ Options:
--version display version and exit
#+END_src
** MediathekView
:PROPERTIES:
:CREATED: [2018-05-10 Thu 17:03]
:END:
When downloading TV shows using [[https://github.com/mediathekview/MediathekView][MediathekView]], you should use the following download pattern:
: %DT%d %s %t - %T -ORIGINAL- %N.mp4
When applying =guess-filename= on the resulting files, you will get something like this:
#+BEGIN_EXAMPLE
20180509T235000 ORF ZIB 24 - Auswirkungen nach US-Aus für Atomdeal -ORIGINAL- 2018-05-09_2350_tl_01_ZIB-24_Auswirkungen-na__13976363__o__1735069995__s14297628_8__BCK1HD_23514710P_23540405P_Q4A.mp4 ...
→ 2018-05-09T23.51.47 ORF - ZIB 24 - Auswirkungen nach US-Aus für Atomdeal -- lowquality.mp4
20180509T235000 ORF ZIB 24 - Hirntoter Bub plötzlich aufgewacht -ORIGINAL- 2018-05-09_2350_tl_01_ZIB-24_Hirntoter-Bub-p__13976363__o__5119815115__s14297631_1__BCK1HD_00045915P_00072303P_Q4A.mp4 ...
→ 2018-05-09T00.04.59 ORF - ZIB 24 - Hirntoter Bub plötzlich aufgewacht -- lowquality.mp4
20180509T235000 ORF ZIB 24 - Meldungen -ORIGINAL- 2018-05-09_2350_tl_01_ZIB-24_Meldungen__13976363__o__1117657593__s14297632_2__BCK1HD_00072303P_00085816P_Q4A.mp4 ...
→ 2018-05-09T00.07.23 ORF - ZIB 24 - Meldungen -- lowquality.mp4
20180509T235000 ORF ZIB 24 - Neuerung bei Filmfestspielen in Cannes -ORIGINAL- 2018-05-09_2350_tl_01_ZIB-24_Neuerung-bei-Fi__13976363__o__1941003027__s14297634_4__BCK1HD_00085816P_00111715P_Q4A.mp4 ...
→ 2018-05-09T00.08.58 ORF - ZIB 24 - Neuerung bei Filmfestspielen in Cannes -- lowquality.mp4
20180509T235000 ORF ZIB 24 - Trumps CIA-Kandidatin umstritten -ORIGINAL- 2018-05-09_2350_tl_01_ZIB-24_Trumps-Kandidat__13976363__o__1488806017__s14297630_0__BCK1HD_00020922P_00045915P_Q4A.mp4 ...
→ 2018-05-09T00.02.09 ORF - ZIB 24 - Trumps CIA-Kandidatin umstritten -- lowquality.mp4
20180509T235000 ORF ZIB 24 - Wetter -ORIGINAL- 2018-05-09_2350_tl_01_ZIB-24_Wetter__13976363__o__2966973785__s14297635_5__BCK1HD_00111715P_00120000P_Q4A.mp4 ...
→ 2018-05-09T00.11.17 ORF - ZIB 24 - Wetter -- lowquality.mp4
#+END_EXAMPLE
As you can see, the temporal order of the chunks is extracted so that
the files are in their correct order.
Please note that this does not work with a show whose chunks do cross
midnight since the date is always taken from the start of the show and
the time from the actual time being shown.
* Related tools and workflows
This tool is part of a tool-set which I use to manage my digital files

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
PROG_VERSION = u"Time-stamp: <2018-05-05 16:36:09 vk>"
PROG_VERSION = u"Time-stamp: <2018-05-10 17:01:51 vk>"
# TODO:
@ -149,14 +149,23 @@ class GuessFilename(object):
VID_REGEX = re.compile('VID_' + DATESTAMP_REGEX + '_' + TIMESTAMP_REGEX + '(.+)?.mp4', re.UNICODE)
VID_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 4, '.', 5, ['.', 7], 8, '.mp4']
# MediathekView: Settings > modify Set > Targetfilename: "%DT%d h%i %s %t - %T - %N.mp4"
# OLD # # MediathekView: Settings > modify Set > Targetfilename: "%DT%d h%i %s %t - %T - %N.mp4" (limited to 120 characters)
# OLD # # results in files like:
# OLD # # 20161227T201500 h115421 ORF Das Sacher. In bester Gesellschaft 1.mp4
# OLD # # 20161227T193000 l119684 ORF ZIB 1 - Auswirkungen der _Panama-Papers_ - 2016-12-27_1930_tl_02_ZIB-1_Auswirkungen-de__.mp4
# OLD # MEDIATHEKVIEW_SIMPLE_REGEX = re.compile(DATESTAMP_REGEX + 'T?' + TIMESTAMP_REGEX +
# OLD # '(.+?)( - [12]\d{3}' + TIMESTAMP_DELIMITERS + '[01]\d' + TIMESTAMP_DELIMITERS +
# OLD # '[0123]\d_.+)?.mp4', re.UNICODE)
# OLD # MEDIATHEKVIEW_SIMPLE_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 4, '.', 5, ['.', 7], 8, '.mp4']
# MediathekView: Settings > modify Set > Targetfilename: "%DT%d %s %t - %T -ORIGINAL- %N.mp4" (without any limitation of the maximum numbers of characters)
# results in files like:
# 20161227T201500 h115421 ORF Das Sacher. In bester Gesellschaft 1.mp4
# 20161227T193000 l119684 ORF ZIB 1 - Auswirkungen der _Panama-Papers_ - 2016-12-27_1930_tl_02_ZIB-1_Auswirkungen-de__.mp4
MEDIATHEKVIEW_REGEX = re.compile(DATESTAMP_REGEX + 'T?' + TIMESTAMP_REGEX +
'(.+?)( - [12]\d{3}' + TIMESTAMP_DELIMITERS + '[01]\d' + TIMESTAMP_DELIMITERS +
'[0123]\d_.+)?.mp4', re.UNICODE)
MEDIATHEKVIEW_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 4, '.', 5, ['.', 7], 8, '.mp4']
# 20180510T090000 ORF ZIB 9_00 - Signation -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Signation__13976423__o__1368225677__s14297692_2__WEB03HD_09000305P_09001400P_Q4A.mp4
# 20180510T090000 ORF ZIB 9_00 - Weitere Signale der Entspannung -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Weitere-Signale__13976423__o__5968792755__s14297694_4__WEB03HD_09011813P_09020710P_Q4A.mp4
MEDIATHEKVIEW_LONG_REGEX = re.compile(DATESTAMP_REGEX + 'T?' + TIMESTAMP_REGEX +
' ORF (.+) - (.+) -ORIGINAL- ' +
'20.+_(' + TIMESTAMP_REGEX + ').+P_' +
'(' + TIMESTAMP_REGEX + ').+P_(Q4A|Q8C).mp4', re.UNICODE)
# C112345678901EUR20150930001.pdf -> 2015-09-30 Bank Austria Kontoauszug 2017-001 12345678901.pdf
BANKAUSTRIA_BANK_STATEMENT_REGEX = re.compile('^C1(\d{11})EUR(\d{4})(\d{2})(\d{2})(\d{3}).pdf$', re.UNICODE)
@ -519,17 +528,39 @@ class GuessFilename(object):
if regex_match:
return self.build_string_via_indexgroups(regex_match, self.BANKAUSTRIA_BANK_TRANSACTIONS_INDEXGROUPS)
# MediathekView: Settings > modify Set > Targetfilename: "%DT%d h%i %s %t - %T - %N.mp4"
# MediathekView: Settings > modify Set > Targetfilename: "%DT%d %s %t - %T -ORIGINAL- %N.mp4" (without any limitation of the maximum numbers of characters)
# results in files like:
# 20161227T201500 h115421 ORF Das Sacher. In bester Gesellschaft 1.mp4
# -> 2016-12-27T20.15.00 h115421 ORF Das Sacher. In bester Gesellschaft 1.mp4
# 20161227T193000 l119684 ORF ZIB 1 - Auswirkungen der _Panama-Papers_ - 2016-12-27_1930_tl_02_ZIB-1_Auswirkungen-de__.mp4
# -> 2016-12-27T19.30.00 l119684 ORF ZIB 1 - Auswirkungen der _Panama-Papers_.mp4
regex_match = re.match(self.MEDIATHEKVIEW_REGEX, oldfilename)
# 20180510T090000 ORF ZIB - Signation -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Signation__13976423__o__1368225677__s14297692_2__WEB03HD_09000305P_09001400P_Q4A.mp4
# regex_match.groups() == ('2018', '05', '10', '09', '00', '00', '00', 'ZIB', 'Signation', '090003', '09', '00', '03', '03', '090014', '09', '00', '14', '14', 'Q4A')
# -> 2018-05-10T09.00.03 ORF - ZIB - Signation -- lowquality.mp4
# 20180510T090000 ORF ZIB - Weitere Signale der Entspannung -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Weitere-Signale__13976423__o__5968792755__s14297694_4__WEB03HD_09011813P_09020710P_Q4A.mp4
# -> 2018-05-10T09.01.18 ORF - ZIB - Weitere Signale der Entspannung -- lowquality.mp4
regex_match = re.match(self.MEDIATHEKVIEW_LONG_REGEX, oldfilename)
if regex_match.group(9) == 'Signation':
import pdb; pdb.set_trace()
if regex_match:
if 'Tatort' in oldfilename and os.stat(oldfilename).st_size < 2000000000 and not options.quiet:
print('' + colorama.Style.BRIGHT + colorama.Fore.RED + 'WARNING: Tatort file seems to be too small (download aborted?): ' + oldfilename + colorama.Style.RESET_ALL)
return self.build_string_via_indexgroups(regex_match, self.MEDIATHEKVIEW_INDEXGROUPS).replace('_', ' ')
if regex_match.group(20).upper() == 'Q4A':
qualitytag = 'lowquality'
elif regex_match.group(20).upper() == 'Q8C':
qualitytag = 'highquality'
else:
qualitytag = 'UNKNOWNQUALITY'
MEDIATHEKVIEW_LONG_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 11, '.', 12, '.', 13, ' ORF - ', 8, ' - ', 9, ' -- ', qualitytag, '.mp4']
return self.build_string_via_indexgroups(regex_match, MEDIATHEKVIEW_LONG_INDEXGROUPS).replace('_', ' ')
# OLD # # MediathekView: Settings > modify Set > Targetfilename: "%DT%d h%i %s %t - %T - %N.mp4"
# OLD # # results in files like:
# OLD # # 20161227T201500 h115421 ORF Das Sacher. In bester Gesellschaft 1.mp4
# OLD # # -> 2016-12-27T20.15.00 h115421 ORF Das Sacher. In bester Gesellschaft 1.mp4
# OLD # # 20161227T193000 l119684 ORF ZIB 1 - Auswirkungen der _Panama-Papers_ - 2016-12-27_1930_tl_02_ZIB-1_Auswirkungen-de__.mp4
# OLD # # -> 2016-12-27T19.30.00 l119684 ORF ZIB 1 - Auswirkungen der _Panama-Papers_.mp4
# OLD # regex_match = re.match(self.MEDIATHEKVIEW_SIMPLE_REGEX, oldfilename)
# OLD # if regex_match:
# OLD # if 'Tatort' in oldfilename and os.stat(oldfilename).st_size < 2000000000 and not options.quiet:
# OLD # print(' → ' + colorama.Style.BRIGHT + colorama.Fore.RED + 'WARNING: Tatort file seems to be too small (download aborted?): ' + oldfilename + colorama.Style.RESET_ALL)
# OLD # return self.build_string_via_indexgroups(regex_match, self.MEDIATHEKVIEW_SIMPLE_INDEXGROUPS).replace('_', ' ')
# Android OSMTracker GPS track files:
# 2015-05-27T09;00;15_foo_bar.gpx -> 2015-05-27T09.00.15 foo bar.gpx