Security Controls

docsfy implements layered controls around repository input, filesystem access, rendered HTML, and operational auditability.

SSRF checks

/api/generate applies two validation layers before cloning remote repositories:

  1. Schema-level URL validation (GenerateRequest) limits accepted formats to Git-style HTTPS/SSH URLs.
  2. Runtime SSRF guard (_reject_private_url) blocks localhost/private targets, including DNS names that resolve to private IPs.
# src/docsfy/models.py
@field_validator("repo_url")
@classmethod
def validate_repo_url(cls, v: str | None) -> str | None:
    if v is None:
        return v
    https_pattern = r"^https?://[\w.\-]+/[\w.\-]+/[\w.\-]+(\.git)?$"
    ssh_pattern = r"^git@[\w.\-]+:[\w.\-]+/[\w.\-]+(\.git)?$"
    if not re.match(https_pattern, v) and not re.match(ssh_pattern, v):
        msg = f"Invalid git repository URL: '{v}'"
        raise ValueError(msg)
    return v
# src/docsfy/main.py
if gen_request.repo_url:
    await _reject_private_url(gen_request.repo_url)

# ...
if hostname in ("localhost", "127.0.0.1", "::1", "0.0.0.0"):
    raise HTTPException(
        status_code=400,
        detail="Repository URL must not target localhost or private networks",
    )

# Check if hostname is an IP address in private range
try:
    addr = ipaddress.ip_address(hostname)
    if not addr.is_global:
        raise HTTPException(
            status_code=400,
            detail="Repository URL must not target localhost or private networks",
        )
except ValueError:
    # hostname is a DNS name - resolve and check
    resolved = await loop.run_in_executor(
        None, socket.getaddrinfo, hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
    )
    for _family, _socktype, _proto, _canonname, sockaddr in resolved:
        ip_str = sockaddr[0]
        addr = ipaddress.ip_address(ip_str)
        if not addr.is_global:
            raise HTTPException(
                status_code=400,
                detail="Repository URL resolves to a private network address",
            )

Test coverage includes explicit SSRF assertions:

# tests/test_main.py
with pytest.raises(HTTPException) as exc_info:
    await _reject_private_url("https://evil.com/org/repo")
assert exc_info.value.status_code == 400

response = await client.post(
    "/api/generate",
    json={"repo_url": "https://localhost/org/repo.git"},
)
assert response.status_code in (400, 422)

Note: _reject_private_url is intentionally described in-code as basic SSRF mitigation; deeper controls (for example, DNS rebinding defenses) are expected at network/firewall layers.


Path traversal protections

Path safety is enforced at multiple points, not just at route parsing.

1) Route/project identifier validation

Project names are constrained to alphanumeric + . _ - patterns.

# src/docsfy/main.py
def _validate_project_name(name: str) -> str:
    """Validate project name to prevent path traversal."""
    if not _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$", name):
        raise HTTPException(status_code=400, detail=f"Invalid project name: '{name}'")
    return name

2) Filesystem segment validation for project paths

owner, ai_provider, and ai_model path segments are rejected if they contain traversal markers.

# src/docsfy/storage.py
def _validate_owner(owner: str) -> str:
    """Validate owner segment to prevent path traversal."""
    if not owner:
        return "_default"
    if "/" in owner or "\\" in owner or ".." in owner or owner.startswith("."):
        msg = f"Invalid owner: '{owner}'"
        raise ValueError(msg)
    return owner

def get_project_dir(
    name: str, ai_provider: str = "", ai_model: str = "", owner: str = ""
) -> Path:
    # Sanitize path segments to prevent traversal
    for segment_name, segment in [("ai_provider", ai_provider), ("ai_model", ai_model)]:
        if (
            "/" in segment
            or "\\" in segment
            or ".." in segment
            or segment.startswith(".")
        ):
            msg = f"Invalid {segment_name}: '{segment}'"
            raise ValueError(msg)
    safe_owner = _validate_owner(owner)
    return PROJECTS_DIR / safe_owner / _validate_name(name) / ai_provider / ai_model

3) Canonical path boundary checks when serving docs

Even with validated project names, requested file paths are resolved and forced to stay inside site_dir.

# src/docsfy/main.py
file_path = site_dir / path
try:
    file_path.resolve().relative_to(site_dir.resolve())
except ValueError as exc:
    raise HTTPException(status_code=403, detail="Access denied") from exc
if not file_path.exists() or not file_path.is_file():
    raise HTTPException(status_code=404, detail="File not found")
return FileResponse(file_path)

4) Slug validation before cache/file writes and deletes

Generation/render steps reject or skip path-unsafe slugs.

# src/docsfy/generator.py
if "/" in slug or "\\" in slug or slug.startswith(".") or ".." in slug:
    msg = f"Invalid page slug: '{slug}'"
    raise ValueError(msg)

# src/docsfy/renderer.py
for slug, content in pages.items():
    if "/" in slug or "\\" in slug or slug.startswith(".") or ".." in slug:
        logger.warning(f"Skipping invalid slug: {slug}")
    else:
        valid_pages[slug] = content
# src/docsfy/main.py
if (
    "/" in slug
    or "\\" in slug
    or ".." in slug
    or slug.startswith(".")
):
    logger.warning(
        f"[{project_name}] Skipping invalid slug from incremental planner: {slug}"
    )
    continue
cache_file = cache_dir / f"{slug}.md"
try:
    cache_file.resolve().relative_to(cache_dir.resolve())
except ValueError:
    logger.warning(f"[{project_name}] Path traversal attempt in slug: {slug}")
    continue

HTML sanitization

AI-generated markdown is converted to HTML and then sanitized before rendering.

Sanitization behavior

  • Removes <script> blocks
  • Removes <iframe>, <object>, <embed>, and <form> tags
  • Strips inline event handlers (onclick, onerror, etc.)
  • Rewrites unsafe href/src values to #
  • Allows only http://, https://, #, /, and mailto:
# src/docsfy/renderer.py
def _sanitize_html(html: str) -> str:
    # Remove script tags and content
    html = re.sub(
        r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE
    )
    # Remove iframe, object, embed, form tags
    for tag in ["iframe", "object", "embed", "form"]:
        html = re.sub(
            rf"<{tag}[^>]*>.*?</{tag}>", "", html, flags=re.DOTALL | re.IGNORECASE
        )
        html = re.sub(rf"<{tag}[^>]*/>", "", html, flags=re.IGNORECASE)

    # Remove event handler attributes
    html = re.sub(r'\s+on\w+\s*=\s*["\'][^"\']*["\']', "", html, flags=re.IGNORECASE)
    html = re.sub(r"\s+on\w+\s*=\s*\S+", "", html, flags=re.IGNORECASE)

    # href/src allowlist; block non-allowed schemes by rewriting to "#"
    # ...
# src/docsfy/renderer.py
def _md_to_html(md_text: str) -> tuple[str, str]:
    md = markdown.Markdown(
        extensions=["fenced_code", "codehilite", "tables", "toc"],
        extension_configs={
            "codehilite": {"css_class": "highlight", "guess_lang": False},
            "toc": {"toc_depth": "2-3"},
        },
    )
    content_html = _sanitize_html(md.convert(md_text))
    toc_html = getattr(md, "toc", "")
    return content_html, toc_html

page rendering uses |safe intentionally, after sanitization:

<!-- src/docsfy/templates/page.html -->
<!-- SECURITY: |safe is intentional. Content is AI-generated markdown
     converted to HTML server-side by the markdown library -->
{{ content | safe }}

Automated tests validate the sanitizer behavior:

# tests/test_renderer.py
result = _sanitize_html("<a href="#">)
assert "javascript:" not in result

result = _sanitize_html('<img src="#" class="p">)
assert "onerror" not in result

content_html, _ = _md_to_html('# Title\n\n<script>alert("xss")</script>\n\nSafe content.')
assert "<script" not in content_html

Warning: Sanitization is regex-based in renderer.py; keep dependency and test updates frequent, because browser parsing edge cases evolve over time.


Audit logging points

Security-sensitive actions are logged with a consistent [AUDIT] prefix.

Logged events

Area Endpoint / action Logged message pattern
Failed authentication POST /login (invalid creds) "[AUDIT] Failed login attempt for username '...'"
User lifecycle POST /api/admin/users, DELETE /api/admin/users/{username} Admin actor + target username + role
Access control changes POST /api/admin/projects/{name}/access, DELETE /api/admin/projects/{name}/access/{username} Admin actor + target user + project + owner scope
Key rotation POST /api/me/rotate-key, POST /api/admin/users/{username}/rotate-key Actor + target username
# src/docsfy/main.py
safe_username = username.replace("\n", "").replace("\r", "")[:100]
logger.info(f"[AUDIT] Failed login attempt for username '{safe_username}'")

logger.info(
    f"[AUDIT] User '{request.state.username}' created user '{username}' with role '{role}'"
)
logger.info(f"[AUDIT] User '{request.state.username}' deleted user '{username}'")

logger.info(
    f"[AUDIT] Admin '{request.state.username}' granted '{username}' access to '{name}' (owner: '{project_owner}')"
)
logger.info(
    f"[AUDIT] Admin '{request.state.username}' revoked '{username}' access to '{name}' (owner: '{project_owner}')"
)

logger.info(f"[AUDIT] User '{username}' rotated their own API key")
logger.info(
    f"[AUDIT] Admin '{request.state.username}' rotated API key for user '{username}'"
)

Tip: Route [AUDIT] records to centralized logging/SIEM and alert on repeated failed logins, key rotations, and privilege/access changes.


Security-relevant configuration and pipeline checks

Runtime configuration

# src/docsfy/main.py
if not settings.admin_key:
    logger.error("ADMIN_KEY environment variable is required")
    raise SystemExit(1)

if len(settings.admin_key) < 16:
    logger.error("ADMIN_KEY must be at least 16 characters long")
    raise SystemExit(1)
# .env.example
# REQUIRED - Admin key for user management (minimum 16 characters)
ADMIN_KEY=your-secure-admin-key-here-min-16-chars

# Set to false for local HTTP development
# SECURE_COOKIES=false

Pre-commit/CI security gates

# .pre-commit-config.yaml
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    hooks:
      - id: detect-private-key
  - repo: https://github.com/Yelp/detect-secrets
    hooks:
      - id: detect-secrets
  - repo: https://github.com/gitleaks/gitleaks
    hooks:
      - id: gitleaks
# tox.toml
[env.unittests]
deps = ["uv"]
commands = [["uv", "run", "--extra", "dev", "pytest", "-n", "auto", "tests"]]
# .gitleaks.toml
[extend]
useDefault = true

Note: No repository-hosted workflow files (.github/workflows, .gitlab-ci.yml, or Jenkinsfile) are present; these checks are configured for pre-commit and can be enforced by external CI orchestration.