Introduction️

Webpack is a module bundler for modern JavaScript applications. Its main function is to take modules with dependencies and generate static assets that represent those modules. Webpack can handle a variety of files and convert them into a single file or several files that are more efficient to serve in a web application.️

The source maps are files that map the compressed or transformed code (such as the one produced after compilation and minification with Webpack) to its original source code. These files are extremely useful for debugging, since they allow developers to see and work with the original code in the browser, even if the code actually running has been transformed.️

Analyzing the source code of a compressed web application frontend with Webpack is complicated to perform reverse engineering, as the resulting code is extremely different from the original code. In some cases, due to a bad configuration of Webpack, the source maps files are generated and uploaded to the production server of the web application, resulting in complete recovery of the frontend web source code. A Python application has been created that, starting from a link, explores the web page looking for JavaScript source code and checks if it is possible to download its corresponding .map file. If it is possible, it downloads and extracts the original source code.️

UnWebPack use

The execution of the Python application requires two arguments, first the URL of the website to scan and secondly the local directory where we want to save the files. This is an example of its execution:️

python .\unwebpack.py 'https://webpage.com' 'C:\\files\\'

Source code️

from bs4 import BeautifulSoup
import json
import os
import re
import requests
import sys

def get_request(url):
    return requests.get(url, headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0'
    })

def get_source_code_urls(html_code):
    bs = BeautifulSoup(html_code, 'lxml')
    elements = []
    elements.extend(bs.find_all('link'))
    elements.extend(bs.find_all('script'))
    links = []
    for elem in elements:
        url = ''
        if 'href' in elem.attrs:
            url = elem.get('href')
        elif 'src' in elem.attrs:
            url = elem.get('src')
        if url != '':
            # Ignore no relative resources
            if 'https://' in url or 'http://' in url:
                continue
            # Take the domain root and the resource
            url = '/'.join(WEB_URL.split('/')[:3]) + url
            if url[-3:] == '.js':
                if url not in links:
                    links.append(url)
    return links

def get_source_code_map_urls(source_urls):
    links = []
    for source_url in source_urls:
        javascript_request = get_request(source_url)
        # search for sourceMappingURL
        found = re.search('//# sourceMappingURL=(.*).js.map', javascript_request.text)
        if found:
            links.append(source_url + '.map')
        else:
            print(source_url + ' has no map file')
    return links

def get_source_code_maps(map_urls):
    maps = []
    for map_url in map_urls:
        map_request = get_request(map_url)
        try:
            map_json = json.loads(map_request.text)
        except:
            print(map_url + ' error loading map file')
            continue
        maps.append(map_json)
    return maps

def get_path_level(path):
    # ./ path
    if re.match('^\./.*', path):
        return 'level_1'
    # ../../ ... path
    elif re.match('^\.\./.*', path):
        findings = re.findall('\.\./', path)
        return 'level_' + str(len(findings))
    # (webpack) path
    elif re.match('^\(webpack\).*', path):
        return 'webpack'
    # other paths
    else:
        return 'level_1'

def save_map_files(map):
    it = 0
    for path in map['sources']:
        # Remove webpack:/// from path
        path = path.split('webpack:///')[1]
        # Get the level of the path (subdirectories)
        path_level = get_path_level(path)
        # Create the path of the file and remove the subdirectories part
        file_path = DOWNLOAD_PATH + path_level + ('\\' if os.name == 'nt' else '/')
        file_path += re.sub('(\.|\.\.|\(webpack\))/', '', path)
        # Remove sync invalid characters
        file_path = file_path.replace('^\.\\\\.*$', '')
        # Replace invalid characters with "-" character
        file_path = re.sub('[*|?|<|"|>|\|]', '-', file_path)
        # Change the path for Windows
        file_path = file_path.replace('/', '\\') if os.name == 'nt' else file_path
        # Create the parent directory
        file_directory = '\\'.join(file_path.split('\\')[:-1])
        os.makedirs(file_directory, exist_ok=True)
        # Write the source code file
        file = open(file_path, 'w', encoding='utf-8')
        file.write(map['sourcesContent'][it])
        file.close()
        it += 1

def process_webpack_application(web_url):
    html_request = get_request(web_url)
    source_code_urls = get_source_code_urls(html_request.text)
    source_code_map_urls = get_source_code_map_urls(source_code_urls)
    source_code_maps = get_source_code_maps(source_code_map_urls)
    for map in source_code_maps:
        save_map_files(map)

WEB_URL = sys.argv[1]
DOWNLOAD_PATH = sys.argv[2]

process_webpack_application(WEB_URL)