Skip to content

fix_certs

import base64
import re
import sys
from pathlib import Path
from textwrap import wrap

PEM_CERT_BEGIN = "-----BEGIN RSA PRIVATE KEY-----"
PEM_CERT_END = "-----END RSA PRIVATE KEY-----"

def to_pem_certificate(cert_str: str) -> str:
    """
    Convert a 'messy' certificate string into a well-formed PEM certificate.
    Handles:
      - PEM with/without headers
      - Base64 blob on one line
      - DER bytes (if provided as bytes-like str or hex)
      - Extra whitespace / non-base64 noise
    Returns a string containing a single PEM certificate block.
    Raises ValueError on failure.
    """
    s = cert_str.strip()

    # If already a (possibly malformed) PEM block, normalize whitespace and line wrapping.
    if PEM_CERT_BEGIN in s and PEM_CERT_END in s:
        # Extract payload between headers
        m = re.search(r"-----BEGIN CERTIFICATE-----\s*(.*?)\s*-----END CERTIFICATE-----",
                      s, flags=re.DOTALL)
        if not m:
            raise ValueError("Found PEM markers, but couldn't extract payload.")
        payload = m.group(1)
        # Remove non-base64 characters
        payload = re.sub(r"[^A-Za-z0-9+/=]", "", payload)
        try:
            der = base64.b64decode(payload, validate=True)
        except Exception:
            # attempt forgiving decode
            der = base64.b64decode(payload + ("=" * ((4 - len(payload) % 4) % 4)))
        b64 = base64.b64encode(der).decode("ascii")
        wrapped = "\n".join(wrap(b64, 64))
        return f"{PEM_CERT_BEGIN}\n{wrapped}\n{PEM_CERT_END}\n"

    # Not a PEM block: try to detect DER or base64
    # 1) Try base64 decode after stripping non-base64 chars
    cleaned = re.sub(r"[^A-Za-z0-9+/=]", "", s)
    der_bytes = None

    if cleaned:
        try:
            der_bytes = base64.b64decode(cleaned, validate=True)
        except Exception:
            # Pad and try again (for missing '=' padding)
            try:
                padded = cleaned + ("=" * ((4 - len(cleaned) % 4) % 4))
                der_bytes = base64.b64decode(padded)
            except Exception:
                der_bytes = None

    # 2) If base64 failed, try to interpret as hex-encoded DER
    if der_bytes is None:
        hex_candidate = re.sub(r"[^0-9A-Fa-f]", "", s)
        if hex_candidate:
            try:
                der_bytes = bytes.fromhex(hex_candidate)
            except Exception:
                pass

    # 3) As a last resort, if string looks binary (unlikely in Python str), fail.
    if der_bytes is None:
        raise ValueError("Input is not valid base64/hex/PEM for an X.509 certificate.")

    # Convert DER → PEM
    b64 = base64.b64encode(der_bytes).decode("ascii")
    wrapped = "\n".join(wrap(b64, 64))
    return f"{PEM_CERT_BEGIN}\n{wrapped}\n{PEM_CERT_END}\n"


def extract_possible_chunks(content: str):
    """
    Attempt to split the input into certificate-like chunks:
    - Existing PEM blocks
    - Big base64/hex blobs separated by whitespace
    """
    pem_blocks = re.findall(
        r"-----BEGIN RSA PRIVATE KEY-----.*?-----END RSA PRIVATE KEY-----",
        content, flags=re.DOTALL
    )
    if pem_blocks:
        return pem_blocks

    # Otherwise, split by blank lines and big blobs
    parts = re.split(r"\n\s*\n", content)
    # Filter parts that look like big payloads (heuristic)
    chunks = [p.strip() for p in parts if len(re.sub(r"\s+", "", p)) > 128]
    return chunks if chunks else [content]

def main():
    if len(sys.argv) < 3:
        print("Usage: python fix_certs.py <input.txt> <output.pem>")
        sys.exit(1)

    inp = Path(sys.argv[1]).read_text(encoding="utf-8", errors="ignore")
    from_fix = []
    for chunk in extract_possible_chunks(inp):
        try:
            pem = to_pem_certificate(chunk)
            from_fix.append(pem)
        except Exception as e:
            print(f"[WARN] Skipping chunk (cannot convert): {e}")

    if not from_fix:
        print("No convertible certificate chunks found.")
        sys.exit(2)

    Path(sys.argv[2]).write_text("".join(from_fix), encoding="utf-8")
    print(f"Wrote {len(from_fix)} certificate(s) → {sys.argv[2]}")

if __name__ == "__main__":
    main()