# main.py
#
# This script performs the following tasks:
# - keeps a local json database up to date with
#   the remote json database on twr360.org
# - keeps a local database of xml/rss feeds up
#   to date with the local json database
#
# The script is intended to be run daily on an
# AWS EC2 server.



### DEPENDENCIES ###
import os           # for file handling
import requests     # for making web requests
import json         # for reading/writing json data
import datetime     # for getting current date and for date formatting
import uuid         # for generating podcast guid
import mutagen.mp3  # for reading mp3 file metadata
import io           # for reading mp3 files in bytes
import copy         # for creating copies of global variables rather than references



### VARIABLES ###
# Below are the initializations of the
# variables that are used throughout main.py.

DEVMODE = False
# When False, all filepaths will be configured for the EC2 server.
# When True, all filepaths will use the local directory.
# This variable must be set to False before it is sent into production.

LOCAL_DIRECTORY = './'
EC2_DIRECTORY = '/var/www/html/TWR360-XML-Podcast-Feeds/'
# The EC2 directory is used for files in production on the server.
# The local directory is used for developer testing.

root = LOCAL_DIRECTORY if DEVMODE else EC2_DIRECTORY
# The root directory for all file operations is either the local directory
# or the EC2 directory based on the value of DEVMODE.

starttime = datetime.datetime.now()
# This variable stores the time when the program started
# It is used to terminate the program when it has exceeded its time limit.

currenttime = datetime.datetime.now()
# This variable is updated to the current time at various points in the program
# and is used in determining whether the time limit has been exceeded.

maxhours = datetime.timedelta(hours=3)
# This variable stores the maximum number of hours the program is allowed to run.
# The program is designed to save its progress and continue the next day if it exceeds its time limit.

IDs = []
# This list stores the ID numbers of each podcast found
# in the file .\podcasts.txt, telling the program which
# podcasts to keep up-to-date.
# The ID number is stored as a string in the format '{ministry-id}.{language-id}'.
# For example: '66.2' for ministry 66 in Spanish (language 2).

local = {}
# This dictionary stores the current podcast as it
# would appear in a locally stored json file.
#
# The dictionary contains the following keys:
#  - 'ID' (str): The ID number of the pocast in the format defined in the `IDs` variable above.
#  - 'builddate' (str): The last date and time this data was updated in the format '%a, %d %b %Y %H:%M:%S EST'.
#  - 'title' (str): The title of the podcast.
#  - 'description' (str): The description of the podcast.
#  - 'language' (str): The language of the podcast as a two or three letter code.
#  - 'donate-url' (str): The URL to the podcast's donation page.
#  - 'image-url' (str): The URL to the podcast's cover image.
#  - 'referral-url' (str): The URL to the podcast on twr360.org.
#  - 'self-url' (str): The URL of the rss feed.
#  - 'guid' (str): An internationally unique id for the podcast, generated as a uuidv5 using the standard RSS namespace with a uuid of ead4c236-bf58-58c6-a2c6-a6b28d128cb6.
#  - 'category' (str): The podcast's category, hardcoded to 'Religion &amp; Spirituality'.
#  - 'subcategory' (str): The podcast's subcategory, hardcoded to 'Christianity'.
#  - 'isexplicit' (str): Whether the podcast contains explicit content, hardcoded to 'false'.
#  - 'author' (str): The podcast's author, hardcoded to 'Trans World Radio 360'.
#  - 'locked' (str): Whether the podcast is prohibited from being used on other platforms, hardcoded to 'no'.
#  - 'type' (str): The podcast's content organization type, hardcoded to 'episodic'.
#  - 'medium' (str): Identifies the RSS feed as a podcast, hardcoded to 'podcast'.
#  - 'episodes' (dict): A dictionary of episodes, where each key is an internally unique episode id number and each value is a dictionary with the following keys:
#      - 'title' (str): The title of the episode.
#      - 'description' (str): The description of the episode.
#      - 'url' (str): The URL of the episode's media file.
#      - 'number' (int): The episode number, starting at 1 for the oldest episode.
#      - 'pubdate' (str): The publication date of the episode in the same format as 'builddate' above.
#      - 'referral-url' (str): The URL to the episode on twr360.org.
#      - 'guid' (str): The episode's internally unique id string.
#      - 'ispermalink' (str): Whether the GUID value is simply the media URL above ('true' or 'false').
#      - 'media-type' (str): The media type of the episode's media file, hardcoded to 'audio/mpeg'.
#      - 'episode-type' (str): The episode type (e.g., 'full', 'trailer', 'bonus'), hardcoded to 'full'.
#      - 'isexplicit' (str): Whether the episode contains explicit content, hardcoded to 'false'.
#      - 'length' (str): The length of the episode's media file in bytes.
#      - 'duration' (str): The duration of the episode's media file in seconds.

oldlocal = {}
# This dictionary acts as a copy of `local` used in determining whether metadata changes occurred.

episode = {}
# This dictionary is used to store a dictionary found in local['episodes'].
# It exists primarily for ease of use later in the program.

remote = {}
# This dictionary stores the current podcast as it
# appears on twr360.org's remote json database.
# A good example of the dictionary's general format can be
# found at https://www.twr360.org/programs/jsonfeed?ministry_id=1
#
# Some of the important keys found in the dictionary are as follows:
#  - 'total_results' (str): The total number of episodes in the podcast.
#  - 'count' (int): The number of episodes per page in the remote json.
#  - 'items' (list): A list containing each episode. Description below.
#  - 'ministry' (dict): A dictionary containing the podcast's metadata. Description below.
#
# Some of the important keys found in each episode are as follows:
#  - 'id' (str): The id number of the episode, apparently unique across the entire remote database.
#  - 'title' (str): The title of the episode.
#  - 'description' (str): The description of the episode.
#  - 'date' (str): The episode's publish date in the format '%Y-%m-%d %H:%M:%S'.
#  - 'scripture' (list): A list of strings for relevant Scripture passages.
#  - 'audio' (dict): A dictionary with information about the media file, with one important key:
#      - 'shortened_url' (str): The URL to the media file stored on the TWR360.org domain.
#
# Some of the important keys found in remote['ministry'] are as follows:
#  - 'name' (str): The title of the podcast.
#  - 'description' (str): The description of the podcast.
#  - 'image' (str): The URL to the podcast's cover image.
#  - 'donate_link' (str): The URL to the podcast's donation page.
#
# A note on episodes without publication dates:
# Whenever a new episode is added to the remote database, it appears to be added to the very top of the list
# if a correct publication date is given, and added to the very bottom of the list if there is no pubdate.
# Therefore, the algorithm below that compiles the episodes into an RSS feeds begins fetching episodes from top
# to bottom until it hits the first episode with no pubdate, after which it only fetches from bottom to top.

ministry = {}
# This dictionary is used to store the dictionary found in remote['ministry'].
# It exists primarily for ease of use later in the program.

item = {}
# This dictionary is used to store a dictionary found in remote['items'].
# It exists primarily for ease of use later in the program.

tasks = {'missing-media-measurements':{}}
# This dictionary stores information about tasks that
# are scheduled to be completed by main.py, representing
# the local json file db\bin\tasks.json.
#
# The dictionary contains the following keys:
#  - 'missing-media-measurements' (dict): A dictionary of episodes missing media length and duration values, where each key is the ID of the podcast in the
#    format defined in the `IDs` variable above, and each value is a list containing each episode id number.

langmap = {"165":"tmh","25":"af","62":"en","88":"am","8":"ar","87":"ar","159":"ar","148":"ar","107":"as","60":"bm","123":"en","35":"bn","64":"bs","41":"bg","53":"ht","100":"ceb","127":"ny","40":"hr","44":"cs","140":"da","142":"prs","37":"nl","95":"nl","1":"en","46":"fa","157":"fi","75":"fon","31":"fr","22":"de","38":"el","104":"gn","69":"gu","63":"ha","52":"iw","21":"hi","65":"hu","99":"ig","135":"ilo","32":"in","147":"in","50":"it","30":"ja","114":"jw","92":"dyu","101":"kab","73":"kn","61":"km","126":"ki","125":"rw","143":"rn","36":"ko","103":"ku","109":"gwi","132":"ln","42":"lt","136":"en","108":"bih","176":"mg","158":"ms","71":"ml","3":"zh","97":"zh","150":"zh","96":"zh","149":"zh","152":"en","74":"mr","59":"ne","76":"no","70":"or","124":"om","94":"pap","137":"ps","47":"pl","26":"pt","54":"pa","153":"qu","43":"ro","138":"ro","16":"ru","39":"sr","130":"sn","110":"si","105":"sd","66":"sk","67":"so","2":"es","29":"sw","154":"sw","156":"sw","155":"sw","45":"sv","48":"tl","72":"ta","27":"te","28":"th","34":"tr","122":"tw","51":"uk","131":"pt","77":"ur","33":"vi","98":"vi","151":"wo","134":"xh","129":"en","55":"en","144":"yo","57":"zu"}
# This dictionary maps twr360.org's numeric language id's with their standardized two or three letter language codes.
# The dictionary was generated by scraping the language selection dropdown on the twr360.org homepage.

xml = ''
# This string stores the XML/RSS feed briefly between its generation and being saved to a file.

pagenumber = 0
# This integer is a counter for keeping track of which page of the remote database to fetch.



### FILE STRUCTURE ###
# The following code is essential for when main.py runs on
# the server for the first time, creating the following file
# tree if it doesn't exist already:
# .\db\json
# .\db\rss
# .\db\bin
# .\db\bin\tasks.json
# It is necessary to create this database programmatically as opposed to
# manually, otherwise the database would be overwritten when this GitHub
# repository is pushed to the EC2 server.

# create the following directories if they don't exist already
os.makedirs(root + 'db/bin', exist_ok=True)
os.makedirs(root + 'db/json', exist_ok=True)
os.makedirs(root + 'db/rss', exist_ok=True)

# create the .\db\bin\tasks.json file if it doesn't exist already
if not os.path.exists(root + 'db/bin/tasks.json'):
    with open(root + 'db/bin/tasks.json', 'w') as file:

# configure the file to the format defined in the tasks dictionary above
        json.dump(tasks, file, indent=4)



### FUNCTIONS ###
# Below are the definitions of function that are used
# repeatedly through the remainder of the script.

# This function defines the file naming convention for the script.
# It accepts a podcast ID number in the format of local[ID] above and
# returns a string similar to 'ministry-66-language-2'
def filename(ID):
    return f'ministry-{ID.split(".")[0]}-language-{ID.split(".")[1]}'

# This function accepts a page number and a podcast ID number in the
# format of local[ID] above and returns the URL for requesting that page
# from the remote twr360.org json database.
# It also prints the requested URL to the console.
def url_page(ID, page):
    url = f'https://www.twr360.org/programs/jsonfeed?ministry_id={ID.split(".")[0]}&language_id={ID.split(".")[1]}&page={page}'
    print('requested', url)
    return url

# This function accepts a string and returns any integer value found
# at the beginning of the string.
# For example, getnum('123abc456def') = '123'.
# The function is used for parsing the ministry and language id numbers
# out of the podcast URLs found in .\podcasts.txt.
def getnum(string):
    number = ''
    # Adding 'z' ensures there is a non-numeric character at the end of
    # the string to initiate the else clause and return statement.
    for char in string + 'z':
        if char.isnumeric():
            number += char
        else:
            return number

# This function accepts a `local` dictionary, format
# defined above, and returns the dictionary's data as an
# xml string conforming to RSS 2.0 specifications.
def makexml(local):
    xml = f'''<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:podcast="https://podcastindex.org/namespace/1.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
    <channel>
        <title><![CDATA[{local['title']}]]></title>
        <description><![CDATA[{local['description']}]]></description>
        <language>{local['language']}</language>
        <link>{local['referral-url']}</link>
        <lastBuildDate>{local['builddate']}</lastBuildDate>
        <image>
            <url>{local['image-url']}</url>
            <title>{local['title']}</title>
            <link>{local['referral-url']}</link>
        </image>
        <itunes:image href="{local['image-url']}"/>
        <itunes:author>{local['author']}</itunes:author>
        <itunes:type>{local['type']}</itunes:type>
        <itunes:explicit>{local['isexplicit']}</itunes:explicit>
        <itunes:category text="{local['category']}">
            <itunes:category text="{local['subcategory']}"/>
        </itunes:category>
        <podcast:guid>{local['guid']}</podcast:guid>'''
    if local['donate-url'] != '':
        xml += f'''
        <podcast:funding url="{local['donate-url']}">Donate</podcast:funding>'''
    xml += f'''
        <podcast:medium>{local['medium']}</podcast:medium>
        <podcast:locked>{local['locked']}</podcast:locked>
        <atom:link href="{local['self-url']}" rel="self" type="application/rss+xml" />'''
    for episodeID in local['episodes']:
        episode = local['episodes'][episodeID]
        xml += f'''
        <item>
            <title><![CDATA[{episode['title']}]]></title>
            <description><![CDATA[{episode['description']}]]></description>
            <enclosure url="{episode['url']}" length="{episode['length']}" type="{episode['media-type']}"/>
            <pubDate>{episode['pubdate']}</pubDate>
            <link>{episode['referral-url']}</link>
            <guid isPermaLink="{episode['ispermalink']}">{episode['guid']}</guid>
            <podcast:episode>{episode['number']}</podcast:episode>
            <itunes:episode>{episode['number']}</itunes:episode>
            <itunes:episodeType>{episode['episode-type']}</itunes:episodeType>
            <itunes:duration>{episode['duration']}</itunes:duration>
            <itunes:explicit>{episode['isexplicit']}</itunes:explicit>
        </item>'''
    xml += '''
    </channel>
</rss>'''
    return xml



#############################
### EXECUTION BEGINS HERE ###
#############################



### PODCAST IDS ###
# This script opens the local file .\podcasts.txt and parses
# the list of podcast URLs it contains to find the ID number
# of each podcast that main.py will update.

# open the file .\podcasts.txt and iterate through it by line
with open(root + 'podcasts.txt', 'r') as file:
    for line in file:

        # parse the ministry id and language id numbers from the URL
        #
        # The URL can take several forms, including:
        #   - https://www.twr360.org/programs/jsonfeed/ministry_id,66/language_id,2
        #   - https://www.twr360.org/programs/jsonfeed?ministry_id=66&language_id=2
        #   - twr360.org/programs/jsonfeed?ministry_id=66&page=4
        # Parsing the URL for an ID number removes the variability, ensuring
        # consistency and reliability for the remainder of the program.
        ID = ''
        ID += getnum(line.split('ministry_id,')[-1].split('ministry_id=')[-1])
            # The above line finds the number following 'ministry_id,' or 'ministry_id=',
            # whichever is present.
        ID += '.'
        ID += getnum(line.split('language_id,')[-1].split('language_id=')[-1]) if 'language_id' in line else '1'
            # The above line finds the language number after ',' or '=' using the same method
            # as the ministry number, with the added clause to set the language to '1' if a
            # language parameter is not present in the URL.
        IDs.append(ID)

# remove duplicates without altering list order
IDs = list(dict.fromkeys(IDs))



### SYNC JSON DATABASES ###
# The goal of the following loop is to make sure that the local json
# database is up-to-date with the remote TWR json database. The following
# algorithm accomplishes this:
# For each ID in the list of podcast IDs populated above:
#  - Load the locally stored json file into the `local` dictionary (leave `local` empty if no file exists)
#  - Request the first page of that podcast's remote json feed and store it in the `remote` dictionary.
#  - Copy all of the podcast's metadata from the `remote` dictionary to the `local` dictionary.
#     - There are some exceptions for values that should be preserved and some values that must be calculated.
#  - Compare the length of local['episodes'] with remote['total_results']. While the first is less than than the second:
#     - Find the first episode in `remote` that is not in `local` (by comparing id numbers) and add it to `remote`.
#     - Fetch additional pages of the remote json database only as needed.
#  - Finally, sort `local` and save it as a local json file.

# populate the `tasks` dictionary
#
# The loop below will be updating the `tasks` dictionary with each new episode it finds, so
# it makes the most sense to open the db\bin\tasks.json file here and populate `tasks`.
with open(root + 'db/bin/tasks.json', 'r') as file:
    tasks = json.load(file)

# This loop runs once for each podcast.
for ID in IDs:

    # if a file for this podcast exists in the local json database, load it into the `local` dictionary
    # otherwise, leave the `local` dictionary empty
    #
    # The rest of the loop will behave the same either way, because the metadata is refreshed regardless and
    # any missing episodes are added. Having a locally stored json file simply minimizes the number of requests
    # made to the TWR API in subsequent runs, and also provides a layer of safety by making sure the local['guid']
    # never changes from the first day it is generated.
    if os.path.exists(root + f'db/json/{filename(ID)}.json'):
        with open(root + f'db/json/{filename(ID)}.json', 'r', encoding='utf8') as file:
            local = json.load(file)
    else:
        local = {}

    # make a copy of `local`
    #
    # After data from `remote` has been added into `local`, `local` will be
    # compared against `oldlocal` to see if anything changed, to reduce unneccessary file writing.
    oldlocal = copy.copy(local)

    # fetch the first page of the podcast from the remote TWR database
    #
    # The remote database is paginated such that each page contains 20 episodes (most recent on top),
    # as well as all of the podcast's metadata.
    with requests.get(url_page(ID, 1)) as response:
        remote = json.loads(response.text)

    # load the dictionary in remote['ministry'] into the `ministry` dictionary
    #
    # The `remote` dictionary is expected to have a key named 'ministry' in which a dictionary of the podcast's
    # metadata is stored. However, tests have found one or more isolated occasions in which the 'ministry' key
    # was instead named 'church'. The if...else statement here addresses that issue.
    ministry = remote['ministry'] if 'ministry' in remote else remote['church']

    # use `remote` to populate the metadata in `local`
    #
    # At this point, `local` may either be blank or already populated, and the following code addresses both cases
    # seamlessly because of our decision to refresh all the metadata either way.
    # The values in `local` are formatted here in a way that makes conversion to XML/RSS as straightforward as possible.
    local['ID'] = ID
    local['total-episodes'] = 0
        # This number will be calculated after the episodes have been updated.
        # It exists only for the developer to easily see how long the file is.
    local['empty-episodes'] = local['empty-episodes'] if 'empty-episodes' in local else 0
        # This stores the number of episodes found without any audio content. Such empty episodes will not be added to
        # local['episodes'], so this value is necessary to ensure the while loop below terminates correctly.
        # It is stored in `local` so that it can be viewed in the json file for debug purposes.
    local['builddate'] = datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S EST')
        # An example of this date format is 'Fri, 17 Jan 2025 12:00:00 EST'
    local['title'] = ministry['name']
    local['description'] = ministry['description']
    local['language'] = langmap[ID.split('.')[1]]
        # Recall that the number trailing the '.' in an ID number is the podcast's language id number.
        # The `langmap` dictionary is used to convert this number to a letter code.
    local['donate-url'] = ministry['donate_link']
    local['image-url'] = ministry['image']
    local['referral-url'] = f'https://www.twr360.org/ministry/{ID.split(".")[0]}?lang={ID.split(".")[1]}'
        # An example of the URL scheme here is https://www.twr360.org/ministry/66?lang=2
    local['self-url'] = f'https://podcast-feeds.twr360.net/db/rss/{filename(ID)}.xml'
        # An example of the URL scheme here is https://podcast-feeds.twr360.net/db/rss/ministry-66-language-2.xml
    local['guid'] = local['guid'] if 'guid' in local else str(uuid.uuid5(uuid.UUID('ead4c236-bf58-58c6-a2c6-a6b28d128cb6'), local['self-url']))
        # If `local` was blank, a guid is generated. If `local` already has a guid, it is not altered, which is crucial.
        # This safeguard will come into play if the local['self-url'] naming scheme is changed during production.
    local['category'] = 'Religion &amp; Spirituality'
    local['subcategory'] = 'Christianity'
    local['isexplicit'] = 'false'
    local['author'] = 'Trans World Radio 360'
    local['locked'] = 'no'
    local['type'] = 'episodic'
    local['medium'] = 'podcast'
    local['episodes'] = local['episodes'] if 'episodes' in local else {}
        # If `local` was blank, this line creates the empty dictionary for episodes.

    # compare `local` and `oldlocal` while excluding the keys in the list below
    excluded_keys = ['builddate','episodes','total-episodes']
    metadata_changed = {k:v for k,v in local.items() if k not in excluded_keys} != {k:v for k,v in oldlocal.items() if k not in excluded_keys}

    # add all missing episodes
    #
    # This loop looks for new episodes in `remote` starting from the most recent (an episode is determined "new" if its id number is
    # not found as a key in local['episodes']) and adds them to local['episodes'] until len(local['episodes']) is equal to the value
    # of remote['total_results'].
    # Several variables are initialized below before the loop begins:
    pubdates = True
        # Podcasts with publishing dates are sorted recent-on-top in the remote database.
        # Podcasts without publish dates appear to be sorted oldest-on-top in the remote database.
        # This value starts True which assumes that the podcast has pubdates, at least on top; and
        # is changed to False as soon as an episode without a pubdate is found.
    pagenumber = 2
        # This variable stores the number of the next page that will need to be fetched from the remote database.
        # Page 1 is fetched above, page 2 will be fetched next, and the while loop below increments the number each time.
        # If the loop changed local['pubdates'] to false, the page number will be reset to the last page and decremented from there.
    i = -1
        # This variable holds the index of which episode in the list remote['items'] is currently being reviewed.
        # It is set to -1 so that it can be incremented at the beginning of the loop, useful becuase of the "continue".
    while len(local['episodes']) < int(remote['total_results']) - local['empty-episodes']:
        i += 1

        # fetch additional pages if necessary
        #
        # The remote database is paginated such that each page only contains 20 episodes, starting with the most recent. That number
        # 20 is contained in remote['count'], so that is used here instead in case the TWR360.org api is altered in the future.
        # The "if" statement here checks whether the variable `i` would be an out-of-range index for the list of episodes in `remote`;
        # if so, the next needed page number is calculated, the page is fetched, and the next 20 episodes are appended to `remote`.
        # However, if the episodes have no publication dates, the remote database is in reverse, so pages are fetched back to front.
        if i >= len(remote['items']):
            with requests.get(url_page(ID, pagenumber)) as response:
                if pubdates:
                    remote['items'] = remote['items'] + json.loads(response.text)['items']
                    pagenumber += 1
                else:
                    remote['items'] = json.loads(response.text)['items'] + remote['items']
                    pagenumber -= 1

        # store the current remote episode in an easily managable dict variable
        #
        # If the podcast has no publication dates, start at the end of the list
        if pubdates:
            item = remote['items'][i]
        else:
            item = remote['items'][i * -1 - 1]

        # ensure that the episode is new
        #
        # If the id number of the episode in `remote` is already present in local['episodes'], skip this
        # iteration of the while loop, thus starting again with the next episode in the list.
        if item['id'] in local['episodes']:
            continue

        # ensure that the episode has audio content
        #
        # This script only supports audio podcasts, so any episode without audio should be skipped.
        # Should audio be added later, this item will still be considered an episode, so there is no
        # need to alter the values of `i` or remote['total_results']. In effect, there will be a gap
        # left in the episode numbering, which may prove useful and allows them to be added later.
        # The value of local['empty-episodes'] will be incremented to ensure the loop terminates correctly.
        if not item['audio']:
            local['empty-episodes'] += 1
            continue

        # format a new episode for `local`
        episode['title'] = item['title']
        episode['description'] = item['description'] if item['description'] != None else ', '.join(item['scripture'])
            # Many episodes do not have descriptions. A comma-separated list of the related Scriptures is a nice alternative.
        if not pubdates:
            episode['pubdate'] = 0
        else:
            try:
                episode['pubdate'] = datetime.datetime.strptime(item['date'], '%Y-%m-%d %H:%M:%S').strftime('%a, %d %b %Y %H:%M:%S EST')
            except:
                # As soon as the program finds an episode without a pubdate, it changes the value of `pubdates` to False and continues
                # getting episodes, but now from the end of the list. The following statements force the next iteration of the loop to
                # begin at the last page of the remote database.
                pubdates = False
                # don't increment the episode counter for this loop
                i -= 1
                # delete the remaining episodes in remote['items']
                while len(remote['items']) > i + 1:
                    del remote['items'][-1]
                # calculate the number of the last page
                totalpages = int(remote['total_results']) / int(remote['count'])
                pagenumber = int(totalpages) + (totalpages % 1 > 0)
            if not pubdates:
                # discard this episode draft, because it belongs at the end of the `local` list
                continue
        episode['url'] = item['audio']['shortened_url']
            # There are multiple available URLs for this, but 'shortened_url' is one that leads to the TWR360.org domain.
        episode['number'] = int(remote['total_results']) - i
        episode['referral-url'] = f'https://www.twr360.org/programs/view/id,{item["id"]}/lang,{ID.split(".")[1]}'
            # An example of the URL scheme here is https://www.twr360.org/programs/view/id,672109/lang,2
        episode['guid'] = episode['url']
        episode['ispermalink'] = 'true'
        episode['media-type'] = 'audio/mpeg'
        episode['episode-type'] = 'full'
        episode['isexplicit'] = 'false'
        episode['length'] = 0
        episode['duration'] = 0
            # Length and duration values will be calculated at a later time because it is a time-consuming process.


        # append the new episode to `local`
        local['episodes'][item['id']] = copy.copy(episode)
            # The copy.copy() prevents every episode from being a reference to the same global variable.

        # add episode id number to list of episodes missing media measurements
        #
        # If the episode being added was from ministry-66 language-2 with an id of 123456, it should appear in the
        # tasks['missing-media-measurements'] list as the following key:value pair:
        #   '66.2' : ['123456', ...(other episode ids from the same podcast)]
        # This 'missing-media-measurements' list in `tasks` keeps track of each episode in the local json database
        # that does not yet have its length and duration values calculated yet.
        if ID not in tasks['missing-media-measurements']:
            tasks['missing-media-measurements'][ID] = []
                # This adds the podcast ID as a dictionary key if its not already present in `tasks`.
        if item['id'] not in tasks['missing-media-measurements'][ID]:
            tasks['missing-media-measurements'][ID].append(item['id'])
                # This add the episode id number to the podcast's list if its not already present.

    # save changes only if something has changed (i will be >= 0 if new episodes were added)
    if metadata_changed or i >= 0:

        # sort episodes by episode['number'], largest to smallest (namely, most recent on top)
        local['episodes'] = dict(sorted(local['episodes'].items(), key=lambda item: item[1]['number'], reverse=True))

        # calculate the number of total episodes
        local['total-episodes'] = len(local['episodes'])

        # write local xml feed
        #
        # Now that all the podcast data is in the correct form in the `local` dictionary, it can be easily converted
        # to an XML/RSS document using the function declared nearer the top of main.py.
        # Every time the XML/RSS string is created in this way, it is completely regenerated rather than edited, which
        # effectively avoids the hassle of ever parsing the XML manually.
        xml = makexml(local)

        # record the current time as the last build date
        local['builddate'] = datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S EST')

        # save json and xml files
        with open(root + f'db/json/{filename(ID)}.json', 'w', encoding='utf8') as file:
            json.dump(local, file, indent=4, ensure_ascii=False)
        with open(root + f'db/rss/{filename(ID)}.xml', 'w', encoding='utf8') as file:
            file.write(xml)
        with open(root + 'db/bin/tasks.json', 'w') as file:
            json.dump(tasks, file, indent=4)
                # Saving tasks.json here is redundant but useful if an error occurs within this loop.

        # log saving of files
        print('saved', root + f'db/json/{filename(ID)}.json')
        print('saved', root + f'db/rss/{filename(ID)}.xml')
        print('saved', root + 'db/bin/tasks.json')

    # check time limit after every podcast
    #
    # This is a safe place to terminate execution of the loop because all progress has been saved.
    currenttime = datetime.datetime.now()
    if starttime + maxhours < currenttime:
        print('time limit exceeded')
        break

### HTML SNIPPET ###
# After all podcasts have been updated, this html document is generated to show the
# end user the URL for each completed XML/RSS feed.

# get all xml files in the directory
rss_path = 'db/rss'
xml_files = [f for f in os.listdir(root + rss_path) if f.endswith('.xml')]

# generate an html snippet
snippet = f'''<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Feeds</title>
  <style>
    body {{ font-family: Arial, sans-serif; margin: 20px;}}
    h1 {{ color: #333;}}
    ul {{list-style-type: none; padding: 0;}}
    li {{ margin: 10px 0;}}
    a {{ text-decoration: none; color: #007BFF;}}
    a:hover {{ text-decoration: underline;}}
  </style>
</head>
<body>
  <h1>Feeds</h1>
  <p>The following XML feeds are contained inside the <code>db/rss</code> folder. Each link placed inside the <code>podcasts.txt</code> file will be converted from JSON into a XML feed link.</p>
  <ul>
'''
for xml_file in xml_files:
    snippet += f'    <li><a href="{rss_path}/{xml_file}">https://podcast-feeds.twr360.net/db/rss/{xml_file}</a></li>\n'
snippet += '''  </ul>
</body>
</html>'''

# save html snippet
with open(root + 'feeds_snippet.html', 'w') as file:
    file.write(snippet)

# log saving file
print('saved', root + 'feeds_snippet.html')



### MEDIA MEASUREMENTS ###
# Updating a podcast requires only one web request per 20 episodes. However, measuring the media files of the episodes
# requires one additional web request per episode. This is undesirable because it is very time consuming, and if the list
# in podcasts.txt is very long, it may prevent main.py from completing its execution within 24 hours.
# To address this issue, db\bin\tasks.json has been designated to keep a list of each episode still waiting on measurements
# so that they can be calculated on a later date if necessary.
# In the following script, ID denotes a podcast id (i.e. '66.2') while eid denotes an episode id (i.e. '123456').

# This loop runs once for each podcast that has missing measurements.
for ID in tasks['missing-media-measurements']:

    # skip this podcast if its episode list is empty
    if tasks['missing-media-measurements'][ID] == []:
        continue

    # populate `local` with the data from the local json file
    with open(root + f'db/json/{filename(ID)}.json', 'r', encoding='utf8') as file:
        local = json.load(file)

    # loop once for each episode in the podcast
    for i in range(len(tasks['missing-media-measurements'][ID])):

        # get the id of the first episode in the list and remove it from `tasks`
        eid = tasks['missing-media-measurements'][ID].pop(0)

        # download the remote media file
        with requests.get(local['episodes'][eid]['url']) as response:

            # log web request
            print('downloaded', local['episodes'][eid]['url'], 'for', ID, '(', len(tasks['missing-media-measurements'][ID]), 'remaining )')

            # transform the web response into a bytes object for analysis
            media = io.BytesIO(response.content)

            # get the location of the last byte in the file to find the bytelength
            local['episodes'][eid]['length'] = media.seek(0, 2)

            # use the media file's metadata to find the duration in seconds
            local['episodes'][eid]['duration'] = int(mutagen.mp3.MP3(media).info.length//1)

        # save progress and check time limit every 10 episodes and at the end of the podcast
        #
        # Saving progress every 10 minutes will save resources if internet connection goes down
        # in the middle of downloading resources for a long podcast.
        if (i+1)%10 == 0 or len(tasks['missing-media-measurements'][ID]) == 0:

            # rewrite local xml feed to include new measurements
            xml = makexml(local)

            # record the current time as the last build date
            local['builddate'] = datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S EST')

            # save json and xml files
            with open(root + f'db/json/{filename(ID)}.json', 'w', encoding='utf8') as file:
                json.dump(local, file, indent=4, ensure_ascii=False)
            with open(root + f'db/rss/{filename(ID)}.xml', 'w', encoding='utf8') as file:
                file.write(xml)
            with open(root + 'db/bin/tasks.json', 'w') as file:
                json.dump(tasks, file, indent=4)
                    # Saving tasks.json here is redundant but useful if an error occurs within this loop.

            # log saving of files
            print('saved', root + f'db/json/{filename(ID)}.json')
            print('saved', root + f'db/rss/{filename(ID)}.xml')
            print('saved', root + 'db/bin/tasks.json')

            # check time limit
            currenttime = datetime.datetime.now()
            if starttime + maxhours < currenttime:
                print('time limit exceeded')
                break

    # break the loop if the time limit was already exceeded
    if starttime + maxhours < currenttime:
        break

# Soli Deo Gloria
