Introduction️
Webpack is a module bundler for modern JavaScript applications. Its main function is to take modules with dependencies and generate static assets that represent those modules. Webpack can handle a variety of files and convert them into a single file or several files that are more efficient to serve in a web application.️
The source maps are files that map the compressed or transformed code (such as the one produced after compilation and minification with Webpack) to its original source code. These files are extremely useful for debugging, since they allow developers to see and work with the original code in the browser, even if the code actually running has been transformed.️
Analyzing the source code of a compressed web application frontend with Webpack is complicated to perform reverse engineering, as the resulting code is extremely different from the original code. In some cases, due to a bad configuration of Webpack, the source maps files are generated and uploaded to the production server of the web application, resulting in complete recovery of the frontend web source code. A Python application has been created that, starting from a link, explores the web page looking for JavaScript source code and checks if it is possible to download its corresponding .map file. If it is possible, it downloads and extracts the original source code.️
UnWebPack use
The execution of the Python application requires two arguments, first the URL of the website to scan and secondly the local directory where we want to save the files. This is an example of its execution:️
python .\unwebpack.py 'https://webpage.com' 'C:\\files\\'
Source code️
from bs4 import BeautifulSoup
import json
import os
import re
import requests
import sys
def get_request(url):
return requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0'
})
def get_source_code_urls(html_code):
bs = BeautifulSoup(html_code, 'lxml')
elements = []
elements.extend(bs.find_all('link'))
elements.extend(bs.find_all('script'))
links = []
for elem in elements:
url = ''
if 'href' in elem.attrs:
url = elem.get('href')
elif 'src' in elem.attrs:
url = elem.get('src')
if url != '':
# Ignore no relative resources
if 'https://' in url or 'http://' in url:
continue
# Take the domain root and the resource
url = '/'.join(WEB_URL.split('/')[:3]) + url
if url[-3:] == '.js':
if url not in links:
links.append(url)
return links
def get_source_code_map_urls(source_urls):
links = []
for source_url in source_urls:
javascript_request = get_request(source_url)
# search for sourceMappingURL
found = re.search('//# sourceMappingURL=(.*).js.map', javascript_request.text)
if found:
links.append(source_url + '.map')
else:
print(source_url + ' has no map file')
return links
def get_source_code_maps(map_urls):
maps = []
for map_url in map_urls:
map_request = get_request(map_url)
try:
map_json = json.loads(map_request.text)
except:
print(map_url + ' error loading map file')
continue
maps.append(map_json)
return maps
def get_path_level(path):
# ./ path
if re.match('^\./.*', path):
return 'level_1'
# ../../ ... path
elif re.match('^\.\./.*', path):
findings = re.findall('\.\./', path)
return 'level_' + str(len(findings))
# (webpack) path
elif re.match('^\(webpack\).*', path):
return 'webpack'
# other paths
else:
return 'level_1'
def save_map_files(map):
it = 0
for path in map['sources']:
# Remove webpack:/// from path
path = path.split('webpack:///')[1]
# Get the level of the path (subdirectories)
path_level = get_path_level(path)
# Create the path of the file and remove the subdirectories part
file_path = DOWNLOAD_PATH + path_level + ('\\' if os.name == 'nt' else '/')
file_path += re.sub('(\.|\.\.|\(webpack\))/', '', path)
# Remove sync invalid characters
file_path = file_path.replace('^\.\\\\.*$', '')
# Replace invalid characters with "-" character
file_path = re.sub('[*|?|<|"|>|\|]', '-', file_path)
# Change the path for Windows
file_path = file_path.replace('/', '\\') if os.name == 'nt' else file_path
# Create the parent directory
file_directory = '\\'.join(file_path.split('\\')[:-1])
os.makedirs(file_directory, exist_ok=True)
# Write the source code file
file = open(file_path, 'w', encoding='utf-8')
file.write(map['sourcesContent'][it])
file.close()
it += 1
def process_webpack_application(web_url):
html_request = get_request(web_url)
source_code_urls = get_source_code_urls(html_request.text)
source_code_map_urls = get_source_code_map_urls(source_code_urls)
source_code_maps = get_source_code_maps(source_code_map_urls)
for map in source_code_maps:
save_map_files(map)
WEB_URL = sys.argv[1]
DOWNLOAD_PATH = sys.argv[2]
process_webpack_application(WEB_URL)