Introduction️
A MPEG-TS file is a file format used to transmit video, audio and other types of data in digital broadcasting systems, such as terrestrial digital television (TDT). HbbTV (Hybrid Broadcast Broadband TV) is a specification that combines traditional broadcast television with internet-based services. HbbTV uses standard web technologies like HTML, JavaScript and CSS to provide this interactive experience for users.️
The Application Information Table (AIT) is an important part of the MPEG-TS standard used specifically in the context of HbbTV. The AIT table provides information about interactive applications available for a particular digital television service. It contains metadata related to the applications, such as their name, description, application type, launch URL, etc.️
A Python program has been developed that, based on the public specification TS 102 809 - V1.1.1 - Digital Video Broadcasting (DVB) extracts for each service (channel) in the MPEG-TS file the URL address of the web page that hosts the service for subsequent analysis.️
Use of the application️
For its use, simply specify the path to the desired file in the parameter. The file can be a recording made with the SmartDVB program. This is an example of its execution.️
$ python get_hbbtv_url_from_ts.py record.ts
Canal: http://service.com/hbbtv/index.xhtml
Source code️
import sys
def get_packet(stream, pid):
for i in range(int(len(stream)/188)):
# get packet
start_packet = i * 188
end_packet = (i + 1) * 188
packet = stream[start_packet:end_packet]
# pid parameter
if packet[2] == pid:
return packet
# not found, return 0
return 0
def get_pid_from_packet(pat):
# byte 12 = pid
return pat[11]
def get_programs_pmt(pmt):
# get bytes length header pmt
length_bytes = pmt[6:8] # two last bytes of header
length_bytes = bin(int(length_bytes.hex(), base=16)
).lstrip('0b') # 2 bytes to bits
length = int(length_bytes[6:16], base=2) # 10 bits = length
# get pmt specific data
pmt_specific_data = pmt[(4+1+3) + 5:(4+1+3) + length - 4]
# get program length
length_program_info = pmt_specific_data[2:4]
length_program_info = bin(int(length_program_info.hex(), base=16)).lstrip(
'0b') # 2 bytes to bits
length_program_info = int(
length_program_info[6:16], base=2) # 10 bits = length
# ignore program info
components = pmt_specific_data[4+length_program_info:]
# get length component and add (ignore if type FF)
programs = []
while True:
if len(components) > 0:
program_length = components[3:5]
program_length = bin(int(program_length.hex(), base=16)).lstrip(
'0b') # 2 bytes to bits
# 10 bits = length
program_length = int(program_length[6:16], base=2)
program = components[:5+program_length]
components = components[5+program_length:] # delete program
programs.append(program)
else:
break
return programs
def get_pid_ait_program(programs):
for program in programs:
if program[0] == 5:
return program[2]
def get_ait_packet(stream, pid):
ait_packet = b''
number = 0
for i in range(int(len(stream)/188)):
# get packet
start_packet = i * 188
end_packet = (i + 1) * 188
packet = stream[start_packet:end_packet]
# pid parameter
if packet[2] == pid:
# check payload start
if packet[1] == 128:
if len(ait_packet) > 0:
break
elif packet[1] == 0:
if len(ait_packet) == 0:
continue
# strip packet header
ait_packet += packet[4:]
# return packet
return ait_packet
def get_url_ait(packet):
# ignore header (12 bytes)
packet = packet[12:]
# ignore application header
packet = packet[10:]
domain = ''
path = ''
while domain == '' or path == '':
# check types and delete if not interesed
if packet[0] != 0x15 and packet[0] != 0x2:
# application_descriptor
packet = packet[2 + packet[1]:]
else:
if packet[0] == 0x15:
path = packet[2:packet[1] + 2]
packet = packet[2 + packet[1]:]
elif packet[0] == 0x2:
domain = packet[6:packet[1] + 1]
packet = packet[2 + packet[1]:]
return domain.decode('ascii') + path.decode('ascii')
def get_program_sid(pmt):
# get sid
return pmt[4+1+4]
def get_name_from_sdt(ts, pid):
sdt = get_packet(ts, 0x11)
sdt = sdt[4+9+4:]
name = ''
while True:
# check types and delete if not interesed
if sdt[0] != pid:
# application_descriptor
sdt = sdt[5+sdt[3]:]
else:
a = hex(sdt[8])
name = sdt[8+sdt[7]+1 : 8+sdt[7]+1 + sdt[8+sdt[7]]]
break
return name.decode('ascii')
# get ts file
print("Opening " + sys.argv[1])
ts = open(sys.argv[1], "rb").read()
# search pat packet
# pat pid = 0, strip header packet and pointer (byte 3), 16 byte size of pat
pat = get_packet(ts, 0)[5:21]
# get program map pid
map_pid = get_pid_from_packet(pat)
# search pmt packet
pmt = get_packet(ts, map_pid)
# get pmt programs
programs_pmt = get_programs_pmt(pmt)
# get ait pid program
pid_ait = get_pid_ait_program(programs_pmt)
# get ait packet print(get_url_ait(ait_packet))
ait_packet = get_ait_packet(ts, pid_ait)
# program sid
p_sid = get_program_sid(pmt)
print(get_name_from_sdt(ts, p_sid) + ": " + get_url_ait(ait_packet))