fix_certs
import base64
import re
import sys
from pathlib import Path
from textwrap import wrap
PEM_CERT_BEGIN = "-----BEGIN RSA PRIVATE KEY-----"
PEM_CERT_END = "-----END RSA PRIVATE KEY-----"
def to_pem_certificate(cert_str: str) -> str:
"""
Convert a 'messy' certificate string into a well-formed PEM certificate.
Handles:
- PEM with/without headers
- Base64 blob on one line
- DER bytes (if provided as bytes-like str or hex)
- Extra whitespace / non-base64 noise
Returns a string containing a single PEM certificate block.
Raises ValueError on failure.
"""
s = cert_str.strip()
# If already a (possibly malformed) PEM block, normalize whitespace and line wrapping.
if PEM_CERT_BEGIN in s and PEM_CERT_END in s:
# Extract payload between headers
m = re.search(r"-----BEGIN CERTIFICATE-----\s*(.*?)\s*-----END CERTIFICATE-----",
s, flags=re.DOTALL)
if not m:
raise ValueError("Found PEM markers, but couldn't extract payload.")
payload = m.group(1)
# Remove non-base64 characters
payload = re.sub(r"[^A-Za-z0-9+/=]", "", payload)
try:
der = base64.b64decode(payload, validate=True)
except Exception:
# attempt forgiving decode
der = base64.b64decode(payload + ("=" * ((4 - len(payload) % 4) % 4)))
b64 = base64.b64encode(der).decode("ascii")
wrapped = "\n".join(wrap(b64, 64))
return f"{PEM_CERT_BEGIN}\n{wrapped}\n{PEM_CERT_END}\n"
# Not a PEM block: try to detect DER or base64
# 1) Try base64 decode after stripping non-base64 chars
cleaned = re.sub(r"[^A-Za-z0-9+/=]", "", s)
der_bytes = None
if cleaned:
try:
der_bytes = base64.b64decode(cleaned, validate=True)
except Exception:
# Pad and try again (for missing '=' padding)
try:
padded = cleaned + ("=" * ((4 - len(cleaned) % 4) % 4))
der_bytes = base64.b64decode(padded)
except Exception:
der_bytes = None
# 2) If base64 failed, try to interpret as hex-encoded DER
if der_bytes is None:
hex_candidate = re.sub(r"[^0-9A-Fa-f]", "", s)
if hex_candidate:
try:
der_bytes = bytes.fromhex(hex_candidate)
except Exception:
pass
# 3) As a last resort, if string looks binary (unlikely in Python str), fail.
if der_bytes is None:
raise ValueError("Input is not valid base64/hex/PEM for an X.509 certificate.")
# Convert DER → PEM
b64 = base64.b64encode(der_bytes).decode("ascii")
wrapped = "\n".join(wrap(b64, 64))
return f"{PEM_CERT_BEGIN}\n{wrapped}\n{PEM_CERT_END}\n"
def extract_possible_chunks(content: str):
"""
Attempt to split the input into certificate-like chunks:
- Existing PEM blocks
- Big base64/hex blobs separated by whitespace
"""
pem_blocks = re.findall(
r"-----BEGIN RSA PRIVATE KEY-----.*?-----END RSA PRIVATE KEY-----",
content, flags=re.DOTALL
)
if pem_blocks:
return pem_blocks
# Otherwise, split by blank lines and big blobs
parts = re.split(r"\n\s*\n", content)
# Filter parts that look like big payloads (heuristic)
chunks = [p.strip() for p in parts if len(re.sub(r"\s+", "", p)) > 128]
return chunks if chunks else [content]
def main():
if len(sys.argv) < 3:
print("Usage: python fix_certs.py <input.txt> <output.pem>")
sys.exit(1)
inp = Path(sys.argv[1]).read_text(encoding="utf-8", errors="ignore")
from_fix = []
for chunk in extract_possible_chunks(inp):
try:
pem = to_pem_certificate(chunk)
from_fix.append(pem)
except Exception as e:
print(f"[WARN] Skipping chunk (cannot convert): {e}")
if not from_fix:
print("No convertible certificate chunks found.")
sys.exit(2)
Path(sys.argv[2]).write_text("".join(from_fix), encoding="utf-8")
print(f"Wrote {len(from_fix)} certificate(s) → {sys.argv[2]}")
if __name__ == "__main__":
main()