re-ordered function definitions

2026-02-16 05:14:16 +00:00 · 2019-10-19 12:53:09 +02:00 · 2019-10-19 12:53:09 +02:00 · 0dbdc168ca
commit 0dbdc168ca
parent 207728809d
1 changed files with 410 additions and 414 deletions
--- a/guessfilename/init.py
+++ b/guessfilename/init.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-PROG_VERSION = u"Time-stamp: <2019-10-19 12:13:52 vk>"
+PROG_VERSION = u"Time-stamp: <2019-10-19 12:52:48 vk>"


 # TODO:
@ -275,419 +275,6 @@ class GuessFilename(object):
        self.logger = logger
        self.config = config

-    def adding_tags(self, tagarray, newtags):
-        """
-        Returns unique array of tags containing the newtag.
-
-        @param tagarray: a array of unicode strings containing tags
-        @param newtag: a array of unicode strings containing tags
-        @param return: a array of unicode strings containing tags
-        """
-
-        assert tagarray.__class__ == list
-        assert newtags.__class__ == list
-
-        resulting_tags = tagarray
-
-        for tag in newtags:
-            if tag not in tagarray:
-                resulting_tags.append(tag)
-
-        return resulting_tags
-
-    def split_filename_entities(self, filename):
-        """
-        Takes a filename of format ( (date(time)?)?(--date(time)?)? )? filename (tags)? (extension)?
-        and returns a set of (date/time/duration, filename, array of tags, extension).
-        """
-
-        # FIXXME: return directory as well!
-
-        assert(type(filename) == str or type(filename) == str)
-        assert(len(filename) > 0)
-
-        components = re.match(self.ISO_NAME_TAGS_EXTENSION_REGEX, filename)
-
-        assert(components)
-
-        if components.group(self.TAGS_INDEX):
-            tags = components.group(self.TAGS_INDEX).split(' ')
-        else:
-            tags = []
-        return components.group(self.DAYTIME_DURATION_INDEX), \
-            components.group(self.NAME_INDEX), \
-            tags, \
-            components.group(self.EXTENSION_INDEX)
-
-    def contains_one_of(self, string, entries):
-        """
-        Returns true, if the string contains one of the strings within entries array
-        """
-
-        assert(type(string) == str or type(string) == str)
-        assert(type(entries) == list)
-        assert(len(string) > 0)
-        assert(len(entries) > 0)
-
-        for entry in entries:
-            if entry in string:
-                return True
-
-        return False
-
-    def contains_all_of(self, string, entries):
-        """
-        Returns true, if the string contains all of the strings within entries array
-        """
-
-        assert(type(string) == str or type(string) == str)
-        assert(type(entries) == list)
-        assert(len(string) > 0)
-        assert(len(entries) > 0)
-
-        for entry in entries:
-            if entry not in string:
-                return False
-
-        return True
-
-    def fuzzy_contains_one_of(self, string, entries):
-        """
-        Returns true, if the string contains a similar one of the strings within entries array
-        """
-
-        assert(type(string) == str or type(string) == str)
-        assert(type(entries) == list)
-        assert(len(string) > 0)
-        assert(len(entries) > 0)
-
-        for entry in entries:
-            similarity = fuzz.partial_ratio(string, entry)
-            if similarity > 64:
-                # logging.debug(u"MATCH   fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entry), similarity))
-                return True
-            else:
-                # logging.debug(u"¬ MATCH fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entry), similarity))
-                pass
-
-        return False
-
-    def fuzzy_contains_all_of(self, string, entries):
-        """
-        Returns true, if the string contains all similar ones of the strings within the entries array
-        """
-
-        assert(type(string) == str or type(string) == str)
-        assert(type(entries) == list)
-        assert(len(string) > 0)
-        assert(len(entries) > 0)
-
-        for entry in entries:
-            assert(type(entry) == str or type(entry) == str)
-            # logging.debug(u"fuzzy_contains_all_of(%s..., %s...) ... " % (string[:30], str(entry[:30])))
-            if entry not in string:
-                # if entry is found in string (exactly), try with fuzzy search:
-
-                similarity = fuzz.partial_ratio(string, entry)
-                if similarity > 64:
-                    # logging.debug(u"MATCH   fuzzy_contains_all_of(%s..., %s) == %i" % (string[:30], str(entry), similarity))
-                    pass
-                else:
-                    # logging.debug(u"¬ MATCH fuzzy_contains_all_of(%s..., %s) == %i" % (string[:30], str(entry), similarity))
-                    return False
-
-        return True
-
-    def has_euro_charge(self, string):
-        """
-        Returns true, if the single-line string contains a number with a €-currency
-        """
-
-        assert(type(string) == str or type(string) == str)
-        assert(len(string) > 0)
-
-        components = re.match(self.EURO_CHARGE_REGEX, string)
-
-        if components:
-            return True
-        else:
-            return False
-
-    def get_euro_charge(self, string):
-        """
-        Returns the first included €-currency within single-line "string" or False
-        """
-
-        assert(type(string) == str or type(string) == str)
-        assert(len(string) > 0)
-
-        components = re.match(self.EURO_CHARGE_REGEX, string)
-
-        if components:
-            return components.group(self.EURO_CHARGE_INDEX)
-        else:
-            return False
-
-    def get_euro_charge_from_context_or_basename(self, string, before, after, basename):
-        """
-        Returns the included €-currency which is between before and after
-        strings or within the basename or return 'FIXXME'
-        """
-
-        charge = self.get_euro_charge_from_context(string, before, after)
-        if not charge:
-            charge = self.get_euro_charge(basename)
-            if not charge:
-                return 'FIXXME'
-
-        return charge
-
-    def get_euro_charge_from_context(self, string, before, after):
-        """
-        Returns the included €-currency which is between before and after strings or False
-        """
-
-        assert(type(string) == str or type(string) == str)
-        assert(type(before) == str or type(before) == str)
-        assert(type(after) == str or type(after) == str)
-        assert(len(string) > 0)
-
-        context_range = '5'  # range of characters where before/after is valid
-
-        # for testing: re.search(".*" + before + r"\D{0,6}(\d{1,6}[,.]\d{2})\D{0,6}" + after + ".*", string).groups()
-        components = re.search(".*" + before + r"\D{0," + context_range + "}((\d{1,6})[,.](\d{2}))\D{0," + context_range + "}" + after + ".*", string)
-
-        if components:
-            floatstring = components.group(2) + ',' + components.group(3)
-            # logging.debug("get_euro_charge_from_context extracted float: [%s]" % floatstring)
-            return floatstring
-        else:
-            logging.warning("Sorry, I was not able to extract a charge for this file, please fix manually")
-            logging.debug("get_euro_charge_from_context was not able to extract a float: between [%s] and [%s] within [%s]" % (before, after, string[:30] + "..."))
-            return False
-
-    def rename_file(self, dirname, oldbasename, newbasename, dryrun=False, quiet=False):
-        """
-        Renames a file from oldbasename to newbasename in dirname.
-
-        Only simulates result if dryrun is True.
-
-        @param dirname: string containing the directory of the file
-        @param oldbasename: string containing the old file name (basename)
-        @param newbasename: string containing the new file name (basename)
-        @param dryrun: boolean which defines if files should be changed (False) or not (True)
-        """
-
-        if oldbasename == newbasename:
-            logging.info("Old filename is same as new filename: skipping file")
-            return False
-
-        oldfile = os.path.join(dirname, oldbasename)
-        newfile = os.path.join(dirname, newbasename)
-
-        if not os.path.isfile(oldfile):
-            logging.error("file to rename does not exist: [%s]" % oldfile)
-            return False
-
-        if os.path.isfile(newfile):
-            logging.error("file can't be renamed since new file name already exists: [%s]" % newfile)
-            return False
-
-        if not quiet:
-            print('       →  ' + colorama.Style.BRIGHT + colorama.Fore.GREEN + newbasename + colorama.Style.RESET_ALL)
-        logging.debug(" renaming \"%s\"" % oldfile)
-        logging.debug("      ⤷   \"%s\"" % newfile)
-        if not dryrun:
-            os.rename(oldfile, newfile)
-        return True
-
-    def build_string_via_indexgroups(self, regex_match, indexgroups):
-        """This function takes a regex_match object and concatenates its
-        groups. It does this by traversing the list of indexgroups. If
-        the list item is an integer, the corresponding
-        regex_match.group() is appended to the result string. If the
-        list item is a string, the string is appended to the result
-        string.
-
-        When a list item is a list, its elements are appended as well as
-        long as all list items exist.
-
-        match-groups that are in the indexgroups but are None are ignored.
-
-        @param regex_match: a regex match object from re.match(REGEX, STRING)
-        @param indexgroups: list of strings and integers like [1, '-', 2, '-', 3, 'T', 4, '.', 5, ' foo .png']
-        @param return: string containing the concatenated string
-
-        """
-
-        if not regex_match:
-            logging.error('no re.match object found; please check before calling build_string_via_indexgroups()')
-            return "ERROR"
-
-        def append_element(string, indexgroups):
-            result = string
-            for element in indexgroups:
-                if type(element) == str:
-                    result += element
-                    # print 'DEBUG: result after element [' + str(element)  + '] =  [' + str(result) + ']'
-                elif type(element) == int:
-                    potential_element = regex_match.group(element)
-                    # ignore None matches
-                    if potential_element:
-                        result += regex_match.group(element)
-                        # print 'DEBUG: result after element [' + str(element)  + '] =  [' + str(result) + ']'
-                    else:
-                        # print 'DEBUG: match-group element ' + str(element) + ' is None'
-                        pass
-                elif type(element) == list:
-                    # recursive: if a list element is a list, process if all elements exists:
-                    # print 'DEBUG: found list item = ' + str(element)
-                    # print 'DEBUG:   result before = [' + str(result) + ']'
-                    all_found = True
-                    for listelement in element:
-                        if type(listelement) == int and (regex_match.group(listelement) is None or
-                                                         len(regex_match.group(listelement)) < 1):
-                            all_found = False
-                    if all_found:
-                        result = append_element(result, element)
-                        # print 'DEBUG:   result after =  [' + str(result) + ']'
-                    else:
-                        pass
-                        # print 'DEBUG:   result after =  [' + str(result) + ']' + \
-                        #    '   -> not changed because one or more elements of sub-list were not found'
-            return result
-
-        logging.debug('build_string_via_indexgroups: FILENAME: ' + str(regex_match.group(0)))
-        logging.debug('build_string_via_indexgroups: GROUPS: ' + str(regex_match.groups()))
-        result = append_element('', indexgroups)
-        logging.debug('build_string_via_indexgroups: RESULT:   ' + result)
-        return result
-
-
-    def NumToMonth(self, month):
-
-        months = ['Dezember', 'Jaenner', 'Februar', 'Maerz', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']
-        return months[month]
-
-
-    def translate_ORF_quality_string_to_tag(self, quality_string):
-        """
-        Returns a filetag which is derived from a key string. The key strings are defined
-        by the file names of the ORF company offering its download file names.
-        """
-
-        if quality_string == 'Q4A' or quality_string == 'LOW':
-            return 'lowquality'
-        elif quality_string == 'Q6A' or quality_string == 'Q8C' or quality_string == 'HD':
-            return 'highquality'
-        else:
-            return 'UNKNOWNQUALITY'
-
-
-    def get_file_size(self, filename):
-        """
-        A simple wrapper to determine file sizes.
-
-        For some hard-coded file names, a hard-coded file size is returned. This enables
-        unit-testing for file sizes that do not exist in the file system.
-        """
-
-        # these are the hard-coded sizes for unit test cases:
-        if filename in ['20180510T090000 ORF - ZIB - Signation -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Signation__13976423__o__1368225677__s14297692_2__WEB03HD_09000305P_09001400P_Q4A.mp4',
-                        '20180510T090000 ORF - ZIB - Weitere Signale der Entspannung -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Weitere-Signale__13976423__o__5968792755__s14297694_4__WEB03HD_09011813P_09020710P_Q4A.mp4',
-                        '20180520T201500 ORF - Tatort - Tatort_ Aus der Tiefe der Zeit -ORIGINAL- 2018-05-20_2015_in_02_Tatort--Aus-der_____13977411__o__1151703583__s14303062_Q8C.mp4',
-                        '20180521T193000 ORF - ZIB 1 - Parlament bereitet sich auf EU-Vorsitz vor -ORIGINAL- 2018-05-21_1930_tl_02_ZIB-1_Parlament-berei__13977453__o__277886215b__s14303762_2__WEB03HD_19350304P_19371319P_Q4A.mp4',
-                        '20180608T193000 ORF - Österreich Heute - Das Magazin - Österreich Heute - Das Magazin -ORIGINAL- 13979231_0007_Q8C.mp4',
-                        '20190902T220000 ORF - ZIB 2 - Bericht über versteckte ÖVP-Wahlkampfkosten -ORIGINALlow- 2019-09-02_2200_tl_02_ZIB-2_Bericht-ueber-v__14024705__o__71528285d6__s14552793_3__ORF2HD_22033714P_22074303P_Q4A.mp4',
-                        '20190902T220000 ORF - ZIB 2 - Hinweis _ Verabschiedung -ORIGINALlow- 2019-09-02_2200_tl_02_ZIB-2_Hinweis---Verab__14024705__o__857007705d__s14552799_9__ORF2HD_22285706P_22300818P_Q4A.mp4']:
-            # don't care about file sizes, return a high number that is abote the expected minimum in any case:
-            return 99999999
-        elif filename == '20180608T170000 ORF - ZIB 17_00 - size okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q4A.mp4':
-            return 5017289  # from an actual downloaded file
-        elif filename == '20180608T170000 ORF - ZIB 17_00 - size not okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q4A.mp4':
-            return 4217289  # manually reduced size from the value of an actual downloaded file
-        elif filename == '20180608T170000 ORF - ZIB 17_00 - size okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q8C.mp4':
-            return 15847932  # from an actual downloaded file
-        elif filename == '20180608T170000 ORF - ZIB 17_00 - size not okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q8C.mp4':
-            return 14050000  # manually reduced size from the value of an actual downloaded file
-        elif filename == '20180610T000000 ORF - Kleinkunst - Kleinkunst_ Cordoba - Das Rückspiel (2_2) -ORIGINAL- 2018-06-10_0000_sd_06_Kleinkunst--Cor_____13979381__o__1483927235__s14313621_1__ORF3HD_23592020P_00593103P_Q8C.mp4':
-            return 1506829698  # from actual file
-        elif filename == '2018-06-14_2105_sd_02_Am-Schauplatz_-_Alles für die Katz-_____13979879__o__1907287074__s14316407_7__WEB03HD_21050604P_21533212P_Q8C.mp4':
-            return 1214980782  # from actual file
-        elif filename == '2018-06-14_2155_sd_06_Kottan-ermittelt - Wien Mitte_____13979903__o__1460660672__s14316392_2__ORF3HD_21570716P_23260915P_Q8C.mp4':
-            return 2231522252  # from actual file
-        elif filename == '2018-06-14_2330_sd_06_Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2)_____13979992__o__1310584704__s14316464_4__ORF3HD_23301620P_00302415P_Q8C.mp4':
-            return 1506983474  # from actual file
-
-        try:
-            return os.stat(filename).st_size
-        except OSError:
-            error_exit(10, 'get_file_size(): Could not get file size of: ' + filename)
-
-
-    def warn_if_ORF_file_seems_to_small_according_to_duration_and_quality_indicator(self, oldfilename, qualityindicator,
-                                                                                    start_hrs, start_min, start_sec,
-                                                                                    end_hrs, end_min, end_sec):
-        """
-        Launches a warning if the expected size differs from the actual file size.
-
-        Expected size is derived from the detailed time-stamp information
-        and tests with a ten minute file:
-
-        | Quality Indicator       | file size | bytes per second |
-        |-------------------------+-----------+------------------|
-        | Q8C = HD                | 240429907 |           400717 |
-        | Q6A = high quality      | 150198346 |           250331 |
-        | Q4A = low quality       |  74992178 |           124987 |
-        """
-
-        #FIXXME: 2019-08-26: disabled: correct from exception to warning #
-        #FIXXME: 2019-09-03: assigned tests also disabled because this function never raises the expected exception
-        return
-
-        TOLERANCE_FACTOR = 0.95  # To cover edge cases where a reduced file size is feasible
-
-        file_size = self.get_file_size(oldfilename)
-
-        day_of_end = 1
-        if int(end_hrs) < int(start_hrs):
-            logging.debug('end hours is less than begin hours, adding a day-change for calculating duration')
-            day_of_end = 2
-
-        end = datetime.datetime(1980, 5, day_of_end, int(end_hrs), int(end_min), int(end_sec))
-        start = datetime.datetime(1980, 5, 1, int(start_hrs), int(start_min), int(start_sec))
-        duration = end - start
-        duration_in_seconds = duration.seconds
-        assert(duration_in_seconds > 0)
-
-        if qualityindicator == 'Q8C':
-            minimum_expected_file_size = 400000 * duration_in_seconds * TOLERANCE_FACTOR
-        elif qualityindicator == 'Q6A':
-            minimum_expected_file_size = 250000 * duration_in_seconds * TOLERANCE_FACTOR
-        elif qualityindicator == 'Q4A':
-            minimum_expected_file_size = 125000 * duration_in_seconds * TOLERANCE_FACTOR
-        else:
-            logging.warn('Unknown quality indicator prevents file size check: ' + qualityindicator)
-            return
-
-        ## additional check for minimum duration because small videos often produced wrong error messages:
-        if duration_in_seconds > 120 and file_size < minimum_expected_file_size:
-            print('\n       →  ' + colorama.Style.BRIGHT + colorama.Fore.RED +
-                  'ERROR: file size seems to be too small for the given duration ' +
-                  'and quality indicator found (download aborted?): \n' +
-                  ' ' * 10 + 'file size:             ' + "{:,}".format(file_size) + ' Bytes\n' +
-                  ' ' * 10 + 'expected minimum size: ' + "{:,}".format(minimum_expected_file_size) + ' Bytes\n' +
-                  ' ' * 10 + 'duration:  ' + str('%.1f' % (duration_in_seconds/60)) + ' minutes\n' +
-                  ' ' * 10 + 'quality:   ' + qualityindicator + '\n' +
-                  ' ' * 10 + 'file name: ' + oldfilename + colorama.Style.RESET_ALL + '\n')
-            raise(FileSizePlausibilityException('file size is not plausible (too small)'))
-        else:
-            logging.debug('warn_if_ORF_file_seems_to_small_according_to_duration_and_quality_indicator: ' +
-                          'file size (' + "{:,}".format(file_size) +
-                          ') is plausible compared to expected minimum (' +
-                          "{:,}".format(minimum_expected_file_size) +
-                          ')')
-

    def derive_new_filename_from_old_filename(self, oldfilename):
        """
@ -1286,6 +873,415 @@ class GuessFilename(object):
            move_to_error_dir(dirname, basename)
            return False

+    def adding_tags(self, tagarray, newtags):
+        """
+        Returns unique array of tags containing the newtag.
+
+        @param tagarray: a array of unicode strings containing tags
+        @param newtag: a array of unicode strings containing tags
+        @param return: a array of unicode strings containing tags
+        """
+
+        assert tagarray.__class__ == list
+        assert newtags.__class__ == list
+
+        resulting_tags = tagarray
+
+        for tag in newtags:
+            if tag not in tagarray:
+                resulting_tags.append(tag)
+
+        return resulting_tags
+
+    def split_filename_entities(self, filename):
+        """
+        Takes a filename of format ( (date(time)?)?(--date(time)?)? )? filename (tags)? (extension)?
+        and returns a set of (date/time/duration, filename, array of tags, extension).
+        """
+
+        # FIXXME: return directory as well!
+
+        assert(type(filename) == str or type(filename) == str)
+        assert(len(filename) > 0)
+
+        components = re.match(self.ISO_NAME_TAGS_EXTENSION_REGEX, filename)
+
+        assert(components)
+
+        if components.group(self.TAGS_INDEX):
+            tags = components.group(self.TAGS_INDEX).split(' ')
+        else:
+            tags = []
+        return components.group(self.DAYTIME_DURATION_INDEX), \
+            components.group(self.NAME_INDEX), \
+            tags, \
+            components.group(self.EXTENSION_INDEX)
+
+    def contains_one_of(self, string, entries):
+        """
+        Returns true, if the string contains one of the strings within entries array
+        """
+
+        assert(type(string) == str or type(string) == str)
+        assert(type(entries) == list)
+        assert(len(string) > 0)
+        assert(len(entries) > 0)
+
+        for entry in entries:
+            if entry in string:
+                return True
+
+        return False
+
+    def contains_all_of(self, string, entries):
+        """
+        Returns true, if the string contains all of the strings within entries array
+        """
+
+        assert(type(string) == str or type(string) == str)
+        assert(type(entries) == list)
+        assert(len(string) > 0)
+        assert(len(entries) > 0)
+
+        for entry in entries:
+            if entry not in string:
+                return False
+
+        return True
+
+    def fuzzy_contains_one_of(self, string, entries):
+        """
+        Returns true, if the string contains a similar one of the strings within entries array
+        """
+
+        assert(type(string) == str or type(string) == str)
+        assert(type(entries) == list)
+        assert(len(string) > 0)
+        assert(len(entries) > 0)
+
+        for entry in entries:
+            similarity = fuzz.partial_ratio(string, entry)
+            if similarity > 64:
+                # logging.debug(u"MATCH   fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entry), similarity))
+                return True
+            else:
+                # logging.debug(u"¬ MATCH fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entry), similarity))
+                pass
+
+        return False
+
+    def fuzzy_contains_all_of(self, string, entries):
+        """
+        Returns true, if the string contains all similar ones of the strings within the entries array
+        """
+
+        assert(type(string) == str or type(string) == str)
+        assert(type(entries) == list)
+        assert(len(string) > 0)
+        assert(len(entries) > 0)
+
+        for entry in entries:
+            assert(type(entry) == str or type(entry) == str)
+            # logging.debug(u"fuzzy_contains_all_of(%s..., %s...) ... " % (string[:30], str(entry[:30])))
+            if entry not in string:
+                # if entry is found in string (exactly), try with fuzzy search:
+
+                similarity = fuzz.partial_ratio(string, entry)
+                if similarity > 64:
+                    # logging.debug(u"MATCH   fuzzy_contains_all_of(%s..., %s) == %i" % (string[:30], str(entry), similarity))
+                    pass
+                else:
+                    # logging.debug(u"¬ MATCH fuzzy_contains_all_of(%s..., %s) == %i" % (string[:30], str(entry), similarity))
+                    return False
+
+        return True
+
+    def has_euro_charge(self, string):
+        """
+        Returns true, if the single-line string contains a number with a €-currency
+        """
+
+        assert(type(string) == str or type(string) == str)
+        assert(len(string) > 0)
+
+        components = re.match(self.EURO_CHARGE_REGEX, string)
+
+        if components:
+            return True
+        else:
+            return False
+
+    def get_euro_charge(self, string):
+        """
+        Returns the first included €-currency within single-line "string" or False
+        """
+
+        assert(type(string) == str or type(string) == str)
+        assert(len(string) > 0)
+
+        components = re.match(self.EURO_CHARGE_REGEX, string)
+
+        if components:
+            return components.group(self.EURO_CHARGE_INDEX)
+        else:
+            return False
+
+    def get_euro_charge_from_context_or_basename(self, string, before, after, basename):
+        """
+        Returns the included €-currency which is between before and after
+        strings or within the basename or return 'FIXXME'
+        """
+
+        charge = self.get_euro_charge_from_context(string, before, after)
+        if not charge:
+            charge = self.get_euro_charge(basename)
+            if not charge:
+                return 'FIXXME'
+
+        return charge
+
+    def get_euro_charge_from_context(self, string, before, after):
+        """
+        Returns the included €-currency which is between before and after strings or False
+        """
+
+        assert(type(string) == str or type(string) == str)
+        assert(type(before) == str or type(before) == str)
+        assert(type(after) == str or type(after) == str)
+        assert(len(string) > 0)
+
+        context_range = '5'  # range of characters where before/after is valid
+
+        # for testing: re.search(".*" + before + r"\D{0,6}(\d{1,6}[,.]\d{2})\D{0,6}" + after + ".*", string).groups()
+        components = re.search(".*" + before + r"\D{0," + context_range + "}((\d{1,6})[,.](\d{2}))\D{0," + context_range + "}" + after + ".*", string)
+
+        if components:
+            floatstring = components.group(2) + ',' + components.group(3)
+            # logging.debug("get_euro_charge_from_context extracted float: [%s]" % floatstring)
+            return floatstring
+        else:
+            logging.warning("Sorry, I was not able to extract a charge for this file, please fix manually")
+            logging.debug("get_euro_charge_from_context was not able to extract a float: between [%s] and [%s] within [%s]" % (before, after, string[:30] + "..."))
+            return False
+
+    def rename_file(self, dirname, oldbasename, newbasename, dryrun=False, quiet=False):
+        """
+        Renames a file from oldbasename to newbasename in dirname.
+
+        Only simulates result if dryrun is True.
+
+        @param dirname: string containing the directory of the file
+        @param oldbasename: string containing the old file name (basename)
+        @param newbasename: string containing the new file name (basename)
+        @param dryrun: boolean which defines if files should be changed (False) or not (True)
+        """
+
+        if oldbasename == newbasename:
+            logging.info("Old filename is same as new filename: skipping file")
+            return False
+
+        oldfile = os.path.join(dirname, oldbasename)
+        newfile = os.path.join(dirname, newbasename)
+
+        if not os.path.isfile(oldfile):
+            logging.error("file to rename does not exist: [%s]" % oldfile)
+            return False
+
+        if os.path.isfile(newfile):
+            logging.error("file can't be renamed since new file name already exists: [%s]" % newfile)
+            return False
+
+        if not quiet:
+            print('       →  ' + colorama.Style.BRIGHT + colorama.Fore.GREEN + newbasename + colorama.Style.RESET_ALL)
+        logging.debug(" renaming \"%s\"" % oldfile)
+        logging.debug("      ⤷   \"%s\"" % newfile)
+        if not dryrun:
+            os.rename(oldfile, newfile)
+        return True
+
+    def build_string_via_indexgroups(self, regex_match, indexgroups):
+        """This function takes a regex_match object and concatenates its
+        groups. It does this by traversing the list of indexgroups. If
+        the list item is an integer, the corresponding
+        regex_match.group() is appended to the result string. If the
+        list item is a string, the string is appended to the result
+        string.
+
+        When a list item is a list, its elements are appended as well as
+        long as all list items exist.
+
+        match-groups that are in the indexgroups but are None are ignored.
+
+        @param regex_match: a regex match object from re.match(REGEX, STRING)
+        @param indexgroups: list of strings and integers like [1, '-', 2, '-', 3, 'T', 4, '.', 5, ' foo .png']
+        @param return: string containing the concatenated string
+
+        """
+
+        if not regex_match:
+            logging.error('no re.match object found; please check before calling build_string_via_indexgroups()')
+            return "ERROR"
+
+        def append_element(string, indexgroups):
+            result = string
+            for element in indexgroups:
+                if type(element) == str:
+                    result += element
+                    # print 'DEBUG: result after element [' + str(element)  + '] =  [' + str(result) + ']'
+                elif type(element) == int:
+                    potential_element = regex_match.group(element)
+                    # ignore None matches
+                    if potential_element:
+                        result += regex_match.group(element)
+                        # print 'DEBUG: result after element [' + str(element)  + '] =  [' + str(result) + ']'
+                    else:
+                        # print 'DEBUG: match-group element ' + str(element) + ' is None'
+                        pass
+                elif type(element) == list:
+                    # recursive: if a list element is a list, process if all elements exists:
+                    # print 'DEBUG: found list item = ' + str(element)
+                    # print 'DEBUG:   result before = [' + str(result) + ']'
+                    all_found = True
+                    for listelement in element:
+                        if type(listelement) == int and (regex_match.group(listelement) is None or
+                                                         len(regex_match.group(listelement)) < 1):
+                            all_found = False
+                    if all_found:
+                        result = append_element(result, element)
+                        # print 'DEBUG:   result after =  [' + str(result) + ']'
+                    else:
+                        pass
+                        # print 'DEBUG:   result after =  [' + str(result) + ']' + \
+                        #    '   -> not changed because one or more elements of sub-list were not found'
+            return result
+
+        logging.debug('build_string_via_indexgroups: FILENAME: ' + str(regex_match.group(0)))
+        logging.debug('build_string_via_indexgroups: GROUPS: ' + str(regex_match.groups()))
+        result = append_element('', indexgroups)
+        logging.debug('build_string_via_indexgroups: RESULT:   ' + result)
+        return result
+
+    def NumToMonth(self, month):
+
+        months = ['Dezember', 'Jaenner', 'Februar', 'Maerz', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']
+        return months[month]
+
+    def translate_ORF_quality_string_to_tag(self, quality_string):
+        """
+        Returns a filetag which is derived from a key string. The key strings are defined
+        by the file names of the ORF company offering its download file names.
+        """
+
+        if quality_string == 'Q4A' or quality_string == 'LOW':
+            return 'lowquality'
+        elif quality_string == 'Q6A' or quality_string == 'Q8C' or quality_string == 'HD':
+            return 'highquality'
+        else:
+            return 'UNKNOWNQUALITY'
+
+    def get_file_size(self, filename):
+        """
+        A simple wrapper to determine file sizes.
+
+        For some hard-coded file names, a hard-coded file size is returned. This enables
+        unit-testing for file sizes that do not exist in the file system.
+        """
+
+        # these are the hard-coded sizes for unit test cases:
+        if filename in ['20180510T090000 ORF - ZIB - Signation -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Signation__13976423__o__1368225677__s14297692_2__WEB03HD_09000305P_09001400P_Q4A.mp4',
+                        '20180510T090000 ORF - ZIB - Weitere Signale der Entspannung -ORIGINAL- 2018-05-10_0900_tl_02_ZIB-9-00_Weitere-Signale__13976423__o__5968792755__s14297694_4__WEB03HD_09011813P_09020710P_Q4A.mp4',
+                        '20180520T201500 ORF - Tatort - Tatort_ Aus der Tiefe der Zeit -ORIGINAL- 2018-05-20_2015_in_02_Tatort--Aus-der_____13977411__o__1151703583__s14303062_Q8C.mp4',
+                        '20180521T193000 ORF - ZIB 1 - Parlament bereitet sich auf EU-Vorsitz vor -ORIGINAL- 2018-05-21_1930_tl_02_ZIB-1_Parlament-berei__13977453__o__277886215b__s14303762_2__WEB03HD_19350304P_19371319P_Q4A.mp4',
+                        '20180608T193000 ORF - Österreich Heute - Das Magazin - Österreich Heute - Das Magazin -ORIGINAL- 13979231_0007_Q8C.mp4',
+                        '20190902T220000 ORF - ZIB 2 - Bericht über versteckte ÖVP-Wahlkampfkosten -ORIGINALlow- 2019-09-02_2200_tl_02_ZIB-2_Bericht-ueber-v__14024705__o__71528285d6__s14552793_3__ORF2HD_22033714P_22074303P_Q4A.mp4',
+                        '20190902T220000 ORF - ZIB 2 - Hinweis _ Verabschiedung -ORIGINALlow- 2019-09-02_2200_tl_02_ZIB-2_Hinweis---Verab__14024705__o__857007705d__s14552799_9__ORF2HD_22285706P_22300818P_Q4A.mp4']:
+            # don't care about file sizes, return a high number that is abote the expected minimum in any case:
+            return 99999999
+        elif filename == '20180608T170000 ORF - ZIB 17_00 - size okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q4A.mp4':
+            return 5017289  # from an actual downloaded file
+        elif filename == '20180608T170000 ORF - ZIB 17_00 - size not okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q4A.mp4':
+            return 4217289  # manually reduced size from the value of an actual downloaded file
+        elif filename == '20180608T170000 ORF - ZIB 17_00 - size okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q8C.mp4':
+            return 15847932  # from an actual downloaded file
+        elif filename == '20180608T170000 ORF - ZIB 17_00 - size not okay -ORIGINAL- 2018-06-08_1700_tl__13979222__o__1892278656__s14313181_1__WEB03HD_17020613P_17024324P_Q8C.mp4':
+            return 14050000  # manually reduced size from the value of an actual downloaded file
+        elif filename == '20180610T000000 ORF - Kleinkunst - Kleinkunst_ Cordoba - Das Rückspiel (2_2) -ORIGINAL- 2018-06-10_0000_sd_06_Kleinkunst--Cor_____13979381__o__1483927235__s14313621_1__ORF3HD_23592020P_00593103P_Q8C.mp4':
+            return 1506829698  # from actual file
+        elif filename == '2018-06-14_2105_sd_02_Am-Schauplatz_-_Alles für die Katz-_____13979879__o__1907287074__s14316407_7__WEB03HD_21050604P_21533212P_Q8C.mp4':
+            return 1214980782  # from actual file
+        elif filename == '2018-06-14_2155_sd_06_Kottan-ermittelt - Wien Mitte_____13979903__o__1460660672__s14316392_2__ORF3HD_21570716P_23260915P_Q8C.mp4':
+            return 2231522252  # from actual file
+        elif filename == '2018-06-14_2330_sd_06_Sommerkabarett - Lukas Resetarits: Schmäh (1 von 2)_____13979992__o__1310584704__s14316464_4__ORF3HD_23301620P_00302415P_Q8C.mp4':
+            return 1506983474  # from actual file
+
+        try:
+            return os.stat(filename).st_size
+        except OSError:
+            error_exit(10, 'get_file_size(): Could not get file size of: ' + filename)
+
+    def warn_if_ORF_file_seems_to_small_according_to_duration_and_quality_indicator(self, oldfilename, qualityindicator,
+                                                                                    start_hrs, start_min, start_sec,
+                                                                                    end_hrs, end_min, end_sec):
+        """
+        Launches a warning if the expected size differs from the actual file size.
+
+        Expected size is derived from the detailed time-stamp information
+        and tests with a ten minute file:
+
+        | Quality Indicator       | file size | bytes per second |
+        |-------------------------+-----------+------------------|
+        | Q8C = HD                | 240429907 |           400717 |
+        | Q6A = high quality      | 150198346 |           250331 |
+        | Q4A = low quality       |  74992178 |           124987 |
+        """
+
+        #FIXXME: 2019-08-26: disabled: correct from exception to warning #
+        #FIXXME: 2019-09-03: assigned tests also disabled because this function never raises the expected exception
+        return
+
+        TOLERANCE_FACTOR = 0.95  # To cover edge cases where a reduced file size is feasible
+
+        file_size = self.get_file_size(oldfilename)
+
+        day_of_end = 1
+        if int(end_hrs) < int(start_hrs):
+            logging.debug('end hours is less than begin hours, adding a day-change for calculating duration')
+            day_of_end = 2
+
+        end = datetime.datetime(1980, 5, day_of_end, int(end_hrs), int(end_min), int(end_sec))
+        start = datetime.datetime(1980, 5, 1, int(start_hrs), int(start_min), int(start_sec))
+        duration = end - start
+        duration_in_seconds = duration.seconds
+        assert(duration_in_seconds > 0)
+
+        if qualityindicator == 'Q8C':
+            minimum_expected_file_size = 400000 * duration_in_seconds * TOLERANCE_FACTOR
+        elif qualityindicator == 'Q6A':
+            minimum_expected_file_size = 250000 * duration_in_seconds * TOLERANCE_FACTOR
+        elif qualityindicator == 'Q4A':
+            minimum_expected_file_size = 125000 * duration_in_seconds * TOLERANCE_FACTOR
+        else:
+            logging.warn('Unknown quality indicator prevents file size check: ' + qualityindicator)
+            return
+
+        ## additional check for minimum duration because small videos often produced wrong error messages:
+        if duration_in_seconds > 120 and file_size < minimum_expected_file_size:
+            print('\n       →  ' + colorama.Style.BRIGHT + colorama.Fore.RED +
+                  'ERROR: file size seems to be too small for the given duration ' +
+                  'and quality indicator found (download aborted?): \n' +
+                  ' ' * 10 + 'file size:             ' + "{:,}".format(file_size) + ' Bytes\n' +
+                  ' ' * 10 + 'expected minimum size: ' + "{:,}".format(minimum_expected_file_size) + ' Bytes\n' +
+                  ' ' * 10 + 'duration:  ' + str('%.1f' % (duration_in_seconds/60)) + ' minutes\n' +
+                  ' ' * 10 + 'quality:   ' + qualityindicator + '\n' +
+                  ' ' * 10 + 'file name: ' + oldfilename + colorama.Style.RESET_ALL + '\n')
+            raise(FileSizePlausibilityException('file size is not plausible (too small)'))
+        else:
+            logging.debug('warn_if_ORF_file_seems_to_small_according_to_duration_and_quality_indicator: ' +
+                          'file size (' + "{:,}".format(file_size) +
+                          ') is plausible compared to expected minimum (' +
+                          "{:,}".format(minimum_expected_file_size) +
+                          ')')
+

 def move_to_success_dir(dirname, newfilename):
    """