Security Controls
docsfy implements layered controls around repository input, filesystem access, rendered HTML, and operational auditability.
SSRF checks
/api/generate applies two validation layers before cloning remote repositories:
- Schema-level URL validation (
GenerateRequest) limits accepted formats to Git-style HTTPS/SSH URLs. - Runtime SSRF guard (
_reject_private_url) blocks localhost/private targets, including DNS names that resolve to private IPs.
# src/docsfy/models.py
@field_validator("repo_url")
@classmethod
def validate_repo_url(cls, v: str | None) -> str | None:
if v is None:
return v
https_pattern = r"^https?://[\w.\-]+/[\w.\-]+/[\w.\-]+(\.git)?$"
ssh_pattern = r"^git@[\w.\-]+:[\w.\-]+/[\w.\-]+(\.git)?$"
if not re.match(https_pattern, v) and not re.match(ssh_pattern, v):
msg = f"Invalid git repository URL: '{v}'"
raise ValueError(msg)
return v
# src/docsfy/main.py
if gen_request.repo_url:
await _reject_private_url(gen_request.repo_url)
# ...
if hostname in ("localhost", "127.0.0.1", "::1", "0.0.0.0"):
raise HTTPException(
status_code=400,
detail="Repository URL must not target localhost or private networks",
)
# Check if hostname is an IP address in private range
try:
addr = ipaddress.ip_address(hostname)
if not addr.is_global:
raise HTTPException(
status_code=400,
detail="Repository URL must not target localhost or private networks",
)
except ValueError:
# hostname is a DNS name - resolve and check
resolved = await loop.run_in_executor(
None, socket.getaddrinfo, hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
)
for _family, _socktype, _proto, _canonname, sockaddr in resolved:
ip_str = sockaddr[0]
addr = ipaddress.ip_address(ip_str)
if not addr.is_global:
raise HTTPException(
status_code=400,
detail="Repository URL resolves to a private network address",
)
Test coverage includes explicit SSRF assertions:
# tests/test_main.py
with pytest.raises(HTTPException) as exc_info:
await _reject_private_url("https://evil.com/org/repo")
assert exc_info.value.status_code == 400
response = await client.post(
"/api/generate",
json={"repo_url": "https://localhost/org/repo.git"},
)
assert response.status_code in (400, 422)
Note:
_reject_private_urlis intentionally described in-code as basic SSRF mitigation; deeper controls (for example, DNS rebinding defenses) are expected at network/firewall layers.
Path traversal protections
Path safety is enforced at multiple points, not just at route parsing.
1) Route/project identifier validation
Project names are constrained to alphanumeric + . _ - patterns.
# src/docsfy/main.py
def _validate_project_name(name: str) -> str:
"""Validate project name to prevent path traversal."""
if not _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$", name):
raise HTTPException(status_code=400, detail=f"Invalid project name: '{name}'")
return name
2) Filesystem segment validation for project paths
owner, ai_provider, and ai_model path segments are rejected if they contain traversal markers.
# src/docsfy/storage.py
def _validate_owner(owner: str) -> str:
"""Validate owner segment to prevent path traversal."""
if not owner:
return "_default"
if "/" in owner or "\\" in owner or ".." in owner or owner.startswith("."):
msg = f"Invalid owner: '{owner}'"
raise ValueError(msg)
return owner
def get_project_dir(
name: str, ai_provider: str = "", ai_model: str = "", owner: str = ""
) -> Path:
# Sanitize path segments to prevent traversal
for segment_name, segment in [("ai_provider", ai_provider), ("ai_model", ai_model)]:
if (
"/" in segment
or "\\" in segment
or ".." in segment
or segment.startswith(".")
):
msg = f"Invalid {segment_name}: '{segment}'"
raise ValueError(msg)
safe_owner = _validate_owner(owner)
return PROJECTS_DIR / safe_owner / _validate_name(name) / ai_provider / ai_model
3) Canonical path boundary checks when serving docs
Even with validated project names, requested file paths are resolved and forced to stay inside site_dir.
# src/docsfy/main.py
file_path = site_dir / path
try:
file_path.resolve().relative_to(site_dir.resolve())
except ValueError as exc:
raise HTTPException(status_code=403, detail="Access denied") from exc
if not file_path.exists() or not file_path.is_file():
raise HTTPException(status_code=404, detail="File not found")
return FileResponse(file_path)
4) Slug validation before cache/file writes and deletes
Generation/render steps reject or skip path-unsafe slugs.
# src/docsfy/generator.py
if "/" in slug or "\\" in slug or slug.startswith(".") or ".." in slug:
msg = f"Invalid page slug: '{slug}'"
raise ValueError(msg)
# src/docsfy/renderer.py
for slug, content in pages.items():
if "/" in slug or "\\" in slug or slug.startswith(".") or ".." in slug:
logger.warning(f"Skipping invalid slug: {slug}")
else:
valid_pages[slug] = content
# src/docsfy/main.py
if (
"/" in slug
or "\\" in slug
or ".." in slug
or slug.startswith(".")
):
logger.warning(
f"[{project_name}] Skipping invalid slug from incremental planner: {slug}"
)
continue
cache_file = cache_dir / f"{slug}.md"
try:
cache_file.resolve().relative_to(cache_dir.resolve())
except ValueError:
logger.warning(f"[{project_name}] Path traversal attempt in slug: {slug}")
continue
HTML sanitization
AI-generated markdown is converted to HTML and then sanitized before rendering.
Sanitization behavior
- Removes
<script>blocks - Removes
<iframe>,<object>,<embed>, and<form>tags - Strips inline event handlers (
onclick,onerror, etc.) - Rewrites unsafe
href/srcvalues to# - Allows only
http://,https://,#,/, andmailto:
# src/docsfy/renderer.py
def _sanitize_html(html: str) -> str:
# Remove script tags and content
html = re.sub(
r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE
)
# Remove iframe, object, embed, form tags
for tag in ["iframe", "object", "embed", "form"]:
html = re.sub(
rf"<{tag}[^>]*>.*?</{tag}>", "", html, flags=re.DOTALL | re.IGNORECASE
)
html = re.sub(rf"<{tag}[^>]*/>", "", html, flags=re.IGNORECASE)
# Remove event handler attributes
html = re.sub(r'\s+on\w+\s*=\s*["\'][^"\']*["\']', "", html, flags=re.IGNORECASE)
html = re.sub(r"\s+on\w+\s*=\s*\S+", "", html, flags=re.IGNORECASE)
# href/src allowlist; block non-allowed schemes by rewriting to "#"
# ...
# src/docsfy/renderer.py
def _md_to_html(md_text: str) -> tuple[str, str]:
md = markdown.Markdown(
extensions=["fenced_code", "codehilite", "tables", "toc"],
extension_configs={
"codehilite": {"css_class": "highlight", "guess_lang": False},
"toc": {"toc_depth": "2-3"},
},
)
content_html = _sanitize_html(md.convert(md_text))
toc_html = getattr(md, "toc", "")
return content_html, toc_html
page rendering uses |safe intentionally, after sanitization:
<!-- src/docsfy/templates/page.html -->
<!-- SECURITY: |safe is intentional. Content is AI-generated markdown
converted to HTML server-side by the markdown library -->
{{ content | safe }}
Automated tests validate the sanitizer behavior:
# tests/test_renderer.py
result = _sanitize_html("<a href="#">)
assert "javascript:" not in result
result = _sanitize_html('<img src="#" class="p">)
assert "onerror" not in result
content_html, _ = _md_to_html('# Title\n\n<script>alert("xss")</script>\n\nSafe content.')
assert "<script" not in content_html
Warning: Sanitization is regex-based in
renderer.py; keep dependency and test updates frequent, because browser parsing edge cases evolve over time.
Audit logging points
Security-sensitive actions are logged with a consistent [AUDIT] prefix.
Logged events
| Area | Endpoint / action | Logged message pattern |
|---|---|---|
| Failed authentication | POST /login (invalid creds) |
"[AUDIT] Failed login attempt for username '...'" |
| User lifecycle | POST /api/admin/users, DELETE /api/admin/users/{username} |
Admin actor + target username + role |
| Access control changes | POST /api/admin/projects/{name}/access, DELETE /api/admin/projects/{name}/access/{username} |
Admin actor + target user + project + owner scope |
| Key rotation | POST /api/me/rotate-key, POST /api/admin/users/{username}/rotate-key |
Actor + target username |
# src/docsfy/main.py
safe_username = username.replace("\n", "").replace("\r", "")[:100]
logger.info(f"[AUDIT] Failed login attempt for username '{safe_username}'")
logger.info(
f"[AUDIT] User '{request.state.username}' created user '{username}' with role '{role}'"
)
logger.info(f"[AUDIT] User '{request.state.username}' deleted user '{username}'")
logger.info(
f"[AUDIT] Admin '{request.state.username}' granted '{username}' access to '{name}' (owner: '{project_owner}')"
)
logger.info(
f"[AUDIT] Admin '{request.state.username}' revoked '{username}' access to '{name}' (owner: '{project_owner}')"
)
logger.info(f"[AUDIT] User '{username}' rotated their own API key")
logger.info(
f"[AUDIT] Admin '{request.state.username}' rotated API key for user '{username}'"
)
Tip: Route
[AUDIT]records to centralized logging/SIEM and alert on repeated failed logins, key rotations, and privilege/access changes.
Security-relevant configuration and pipeline checks
Runtime configuration
# src/docsfy/main.py
if not settings.admin_key:
logger.error("ADMIN_KEY environment variable is required")
raise SystemExit(1)
if len(settings.admin_key) < 16:
logger.error("ADMIN_KEY must be at least 16 characters long")
raise SystemExit(1)
# .env.example
# REQUIRED - Admin key for user management (minimum 16 characters)
ADMIN_KEY=your-secure-admin-key-here-min-16-chars
# Set to false for local HTTP development
# SECURE_COOKIES=false
Pre-commit/CI security gates
# .pre-commit-config.yaml
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
hooks:
- id: detect-private-key
- repo: https://github.com/Yelp/detect-secrets
hooks:
- id: detect-secrets
- repo: https://github.com/gitleaks/gitleaks
hooks:
- id: gitleaks
# tox.toml
[env.unittests]
deps = ["uv"]
commands = [["uv", "run", "--extra", "dev", "pytest", "-n", "auto", "tests"]]
# .gitleaks.toml
[extend]
useDefault = true
Note: No repository-hosted workflow files (
.github/workflows,.gitlab-ci.yml, orJenkinsfile) are present; these checks are configured for pre-commit and can be enforced by external CI orchestration.