Introduction️

A MPEG-TS file is a file format used to transmit video, audio and other types of data in digital broadcasting systems, such as terrestrial digital television (TDT). HbbTV (Hybrid Broadcast Broadband TV) is a specification that combines traditional broadcast television with internet-based services. HbbTV uses standard web technologies like HTML, JavaScript and CSS to provide this interactive experience for users.️

The Application Information Table (AIT) is an important part of the MPEG-TS standard used specifically in the context of HbbTV. The AIT table provides information about interactive applications available for a particular digital television service. It contains metadata related to the applications, such as their name, description, application type, launch URL, etc.️

A Python program has been developed that, based on the public specification TS 102 809 - V1.1.1 - Digital Video Broadcasting (DVB) extracts for each service (channel) in the MPEG-TS file the URL address of the web page that hosts the service for subsequent analysis.️

Use of the application️

For its use, simply specify the path to the desired file in the parameter. The file can be a recording made with the SmartDVB program. This is an example of its execution.️

$ python get_hbbtv_url_from_ts.py record.ts
Canal: http://service.com/hbbtv/index.xhtml

Source code️

import sys

def get_packet(stream, pid):
    for i in range(int(len(stream)/188)):
        # get packet
        start_packet = i * 188
        end_packet = (i + 1) * 188
        packet = stream[start_packet:end_packet]

        # pid parameter
        if packet[2] == pid:
            return packet
    # not found, return 0
    return 0


def get_pid_from_packet(pat):
    # byte 12 = pid
    return pat[11]


def get_programs_pmt(pmt):
    # get bytes length header pmt
    length_bytes = pmt[6:8]  # two last bytes of header
    length_bytes = bin(int(length_bytes.hex(), base=16)
                       ).lstrip('0b')  # 2 bytes to bits
    length = int(length_bytes[6:16], base=2)  # 10 bits = length

    # get pmt specific data
    pmt_specific_data = pmt[(4+1+3) + 5:(4+1+3) + length - 4]

    # get program length
    length_program_info = pmt_specific_data[2:4]
    length_program_info = bin(int(length_program_info.hex(), base=16)).lstrip(
        '0b')  # 2 bytes to bits
    length_program_info = int(
        length_program_info[6:16], base=2)  # 10 bits = length

    # ignore program info
    components = pmt_specific_data[4+length_program_info:]

    # get length component and add (ignore if type FF)
    programs = []
    while True:
        if len(components) > 0:
            program_length = components[3:5]
            program_length = bin(int(program_length.hex(), base=16)).lstrip(
                '0b')  # 2 bytes to bits
            # 10 bits = length
            program_length = int(program_length[6:16], base=2)
            program = components[:5+program_length]
            components = components[5+program_length:]  # delete program
            programs.append(program)
        else:
            break

    return programs


def get_pid_ait_program(programs):
    for program in programs:
        if program[0] == 5:
            return program[2]


def get_ait_packet(stream, pid):
    ait_packet = b''
    number = 0
    for i in range(int(len(stream)/188)):
        # get packet
        start_packet = i * 188
        end_packet = (i + 1) * 188
        packet = stream[start_packet:end_packet]

        # pid parameter
        if packet[2] == pid:
            # check payload start
            if packet[1] == 128:
                if len(ait_packet) > 0:
                    break
            elif packet[1] == 0:
                if len(ait_packet) == 0:
                    continue
            # strip packet header
            ait_packet += packet[4:]
    # return packet
    return ait_packet


def get_url_ait(packet):
    # ignore header (12 bytes)
    packet = packet[12:]
    # ignore application header
    packet = packet[10:]
    domain = ''
    path = ''

    while domain == '' or path == '':
        # check types and delete if not interesed
        if packet[0] != 0x15 and packet[0] != 0x2:
            # application_descriptor
            packet = packet[2 + packet[1]:]
        else:
            if packet[0] == 0x15:
                path = packet[2:packet[1] + 2]
                packet = packet[2 + packet[1]:]
            elif packet[0] == 0x2:
                domain = packet[6:packet[1] + 1]
                packet = packet[2 + packet[1]:]

    return domain.decode('ascii') + path.decode('ascii')

def get_program_sid(pmt):
    # get sid
    return pmt[4+1+4]

def get_name_from_sdt(ts, pid):
    sdt = get_packet(ts, 0x11)
    sdt = sdt[4+9+4:]
    name = ''

    while True:
        # check types and delete if not interesed
        if sdt[0] != pid:
            # application_descriptor
            sdt = sdt[5+sdt[3]:]
        else:
            a = hex(sdt[8])
            name = sdt[8+sdt[7]+1 : 8+sdt[7]+1 + sdt[8+sdt[7]]]
            break

    return name.decode('ascii')

# get ts file
print("Opening " + sys.argv[1])
ts = open(sys.argv[1], "rb").read()

# search pat packet
# pat pid = 0, strip header packet and pointer (byte 3), 16 byte size of pat
pat = get_packet(ts, 0)[5:21]

# get program map pid
map_pid = get_pid_from_packet(pat)

# search pmt packet
pmt = get_packet(ts, map_pid)

# get pmt programs
programs_pmt = get_programs_pmt(pmt)

# get ait pid program
pid_ait = get_pid_ait_program(programs_pmt)

# get ait packet print(get_url_ait(ait_packet))
ait_packet = get_ait_packet(ts, pid_ait)

# program sid
p_sid = get_program_sid(pmt)

print(get_name_from_sdt(ts, p_sid) + ": " + get_url_ait(ait_packet))