Newer
Older
DirtyScripts / analyse_postman.py
import json
import sys
import re
from collections import defaultdict


JWT_REGEX = re.compile(r"eyJ[\w-]{10,}\.[\w-]{10,}\.[\w-]{10,}")

SENSITIVE_NAMES = {
    "token", "access_token", "refresh_token", "id_token", "password", "passwd",
    "secret", "client_secret", "credential", "credentials", "api_key", "apikey",
    "authorization", "auth", "x-api-key", "signature", "jwt", "bearer", "session"
}


ONLY_BEARER = {"token", "value", "string"}


def mask_value(val: str, head: int = 4, tail: int = 4) -> str:
    s = str(val)
    if len(s) <= head + tail:
        return "***"
    return f"{s[:head]}***{s[-tail:]}"


def looks_like_secret(val: str) -> bool:
    if not isinstance(val, str):
        return False
    if JWT_REGEX.search(val):
        return True
    if val.strip().lower().startswith("bearer "):
        return True
    if val.strip().lower().startswith("basic "):
        return True
    compact = val.replace("-", "").replace("_", "").replace("=", "")
    if len(compact) >= 32 and compact.isalnum():
        return True
    return False


def analyse_postman_collection(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        collection = json.load(f)

    endpoints_seen = []
    stats = defaultdict(lambda: {
        "GET_params": 0,
        "POST_params": 0,
        "methods": set(),
        "secret_names": set()
    })

    def flag_kv(endpoint: str, key_name: str, value):
        key_l = str(key_name).lower() if key_name is not None else ""
        val_s = str(value) if value is not None else ""
        name_is_sensitive = any(n in key_l for n in SENSITIVE_NAMES)
        value_is_secret = looks_like_secret(val_s)
        if name_is_sensitive or value_is_secret:
            if key_l in ONLY_BEARER or value_is_secret:
                stats[endpoint]["secret_names"].add("bearer")
            else:
                stats[endpoint]["secret_names"].add(key_name)

    def analyse_auth(endpoint: str, auth_obj):
        if not isinstance(auth_obj, dict):
            return
        a_type = auth_obj.get('type')
        if a_type and isinstance(auth_obj.get(a_type), list):
            for p in auth_obj.get(a_type, []):
                k = p.get('key')
                v = p.get('value')
                flag_kv(endpoint, k, v)

    def scan_headers(endpoint: str, headers):
        if not isinstance(headers, list):
            return
        for h in headers:
            name = h.get('key') or h.get('name')
            val = h.get('value')
            flag_kv(endpoint, name, val)

    def scan_queries(endpoint: str, queries):
        if not isinstance(queries, list):
            return
        for q in queries:
            flag_kv(endpoint, q.get('key'), q.get('value'))

    def scan_body(endpoint: str, body):
        if not isinstance(body, dict):
            return
        mode = body.get('mode')
        if mode == 'urlencoded':
            for p in body.get('urlencoded', []):
                flag_kv(endpoint, p.get('key'), p.get('value'))
        elif mode == 'formdata':
            for p in body.get('formdata', []):
                if p.get('type') == 'file':
                    continue
                flag_kv(endpoint, p.get('key'), p.get('value'))
        elif mode == 'raw':
            raw = body.get('raw')
            if isinstance(raw, str):
                try:
                    obj = json.loads(raw)
                    scan_structure(endpoint, obj)
                except Exception:
                    if looks_like_secret(raw):
                        flag_kv(endpoint, 'raw', raw)

    def scan_structure(endpoint: str, data):
        if isinstance(data, dict):
            if set(data.keys()) >= {"key", "value"}:
                flag_kv(endpoint, data.get('key'), data.get('value'))
            for k, v in data.items():
                flag_kv(endpoint, k, v if isinstance(v, str) else None)
                scan_structure(endpoint, v)
        elif isinstance(data, list):
            for v in data:
                scan_structure(endpoint, v)
        elif isinstance(data, str):
            if looks_like_secret(data):
                flag_kv(endpoint, None, data)

    def build_endpoint(url_obj):
        if isinstance(url_obj, dict):
            path = "/".join(url_obj.get('path', [])).strip('/')
            return f"/{path}" if path else "/"
        return str(url_obj)

    def extract_items(items, inherited_auth=None):
        for item in items or []:
            current_auth = item.get('auth', inherited_auth)
            if 'request' in item:
                request = item['request']
                method = request.get('method', 'UNKNOWN')
                url = request.get('url', {})
                endpoint = build_endpoint(url)
                endpoints_seen.append(endpoint)

                stats[endpoint]["methods"].add(method)

                queries = url.get('query', []) if isinstance(url, dict) else []
                if method == 'GET':
                    stats[endpoint]["GET_params"] += len(queries)
                elif method == 'POST':
                    stats[endpoint]["POST_params"] += 1

                scan_queries(endpoint, queries)
                scan_headers(endpoint, request.get('header', []))
                effective_auth = request.get('auth', current_auth)
                analyse_auth(endpoint, effective_auth)
                scan_body(endpoint, request.get('body', {}))
                scan_structure(endpoint, request)

            extract_items(item.get('item'), current_auth)

    extract_items(collection.get('item'), collection.get('auth'))

    header = "Endpoint                               | GET | POST | Methods       | Secrets"
    print(header)
    print("-" * len(header))

    total_get = 0
    total_post = 0

    for endpoint in sorted(stats.keys()):
        data = stats[endpoint]
        methods = ",".join(sorted(data['methods'])) if data['methods'] else "-"
        secret_names = sorted(list(data['secret_names']))
        secrets_cell = f"{len(secret_names)}" if not secret_names else f"{len(secret_names)} [" + ", ".join(secret_names[:4]) + ("]" if len(secret_names) <= 4 else ", …]")
        print(f"{endpoint:<38} | {data['GET_params']:^3} | {data['POST_params']:^4} | {methods:<13} | {secrets_cell}")
        total_get += data['GET_params']
        total_post += data['POST_params']

    unique_endpoints = len(set(endpoints_seen))

    print(f"Total unique endpoints: {unique_endpoints} (GET: {total_get} POST: {total_post})")


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python analyse_postman.py <postman_collection.json>")
        sys.exit(1)

    analyse_postman_collection(sys.argv[1])