Error: Docker daemon not running
Solution:
# Start Docker
# Windows (PowerShell)
Start-Service Docker
# macOS
open /Applications/Docker.app
# Linux
sudo systemctl start docker
Error: Ports already in use or Cannot connect to Docker daemon
Solution:
# Find process using port
netstat -tulpn | grep :8000
# Kill process
kill -9 <PID>
# Or change port in .env
# Restart containers
docker compose restart
Error: OOMKilled or Cannot allocate memory
Solution:
# Check Docker memory allocation
docker system df
# Increase Docker memory in settings
# Settings > Resources > Memory: 4GB+
# Clear unused containers/images
docker system prune -a
Error: connection refused or authentication failed
Solution:
# Check database is running
docker compose ps postgres
# Check logs
docker compose logs postgres
# Verify credentials in .env
grep DATABASE_URL .env
# Test connection
docker compose exec postgres psql -U heimdall_user -d heimdall -c "SELECT 1"
Error: Alembic migration error or Schema mismatch
Solution:
# Check migration status
alembic current
# View migrations
ls db/migrations/versions/
# Rollback failed migration
alembic downgrade -1
# Re-run migration
alembic upgrade head
# If stuck, reset database
make db-reset # WARNING: Deletes all data!
Error: API responses slow or timeouts
Solution:
-- Check slow queries
SELECT query, calls, mean_time FROM pg_stat_statements
WHERE mean_time > 1000
ORDER BY mean_time DESC;
-- Create missing indexes
CREATE INDEX idx_signal_measurements_created_at
ON signal_measurements(created_at DESC);
-- Analyze table stats
ANALYZE signal_measurements;
-- Vacuum to reclaim space
VACUUM signal_measurements;
Error: 404 Not Found
Solution:
# Check API is running
curl http://localhost:8000/health
# Check endpoint is correct
curl http://localhost:8000/api/v1/tasks
# View available routes
curl http://localhost:8000/docs
# Check logs for routing errors
docker compose logs api-gateway | grep -i error
Error: 504 Gateway Timeout or Request timed out
Solution:
# Check worker pool size
docker compose ps | grep worker
# Scale up workers
docker compose up -d --scale worker=5
# Check queue depth
docker compose exec rabbitmq rabbitmqctl list_queues name messages
# Increase timeout in config
# Set REQUEST_TIMEOUT=300 in .env
Error: 401 Unauthorized or 403 Forbidden
Solution:
# Verify API key
echo $API_KEY
# Check token expiration
curl -H "Authorization: Bearer $TOKEN" http://localhost:8000/health
# Generate new token
python -c "import secrets; print(secrets.token_urlsafe(32))"
# Update .env
API_KEY=new_key_here
Error: Connection refused or HTTP 503 Service Unavailable
Solution:
# Check WebSDR status
curl http://sdr1.ik1jns.it:8076/status
# Verify network connectivity
ping sdr1.ik1jns.it
# Check firewall rules
sudo ufw allow out to any port 8076
# Use VPN if needed (some stations block certain regions)
Error: No valid signals detected
Solution:
Error: Intermittent timeouts, partial data
Solution:
# Increase retry count in config
WEBSDR_CONFIG = {
'retry_count': 5, # Increase from 3
'timeout': 15, # Increase from 10
}
# Use exponential backoff
# Check network connectivity
docker compose exec api-gateway ping 8.8.8.8
Error: High CPU, slow responses
Solution:
# Check which process uses CPU
docker stats
# Profile slow operations
python -m cProfile -s cumulative script.py > profile.txt
# Check for long-running queries
docker compose logs ml-detector | grep -i slow
Error: OOM errors or memory leak
Solution:
# Check memory per container
docker stats
# Identify memory leak
# Look for unbounded arrays or caches
# Clear cache
docker compose exec redis redis-cli FLUSHALL
# Limit memory per container
# Update docker compose.yml with mem_limit
Error: GPU utilization 0%, CUDA errors
Solution:
# Check GPU is visible
nvidia-smi
# Check CUDA available
python -c "import torch; print(torch.cuda.is_available())"
# Check driver
nvidia-smi --query-gpu=driver_version --format=csv,noheader
# Update drivers
# Follow nvidia.com instructions for your OS
# Force GPU usage in config
INFERENCE_DEVICE=cuda
Error: Connection refused or AMQP protocol error
Solution:
# Check RabbitMQ running
docker compose ps rabbitmq
# Check logs
docker compose logs rabbitmq
# Reset RabbitMQ
docker compose exec rabbitmq rabbitmqctl reset
docker compose restart rabbitmq
# Monitor queue
docker compose exec rabbitmq rabbitmqctl list_queues
Error: Tasks not being processed
Solution:
# Check worker count
docker compose ps worker | wc -l
# Check queue size
docker compose exec rabbitmq rabbitmqctl list_queues name messages
# Purge queue (WARNING: Deletes tasks!)
docker compose exec rabbitmq rabbitmqctl purge_queue celery
# Restart workers
docker compose restart worker
Error: ConnectionRefusedError: [Errno 111] Connection refused
Solution:
# Check Redis running
docker compose ps redis
# Check port mapping
docker compose port redis 6379
# Check logs
docker compose logs redis
# Restart Redis
docker compose restart redis
Error: OOM command not allowed when used memory > maxmemory
Solution:
# Check memory usage
docker compose exec redis redis-cli INFO memory
# Increase maxmemory
docker compose exec redis redis-cli CONFIG SET maxmemory 1gb
# Enable eviction policy
docker compose exec redis redis-cli CONFIG SET maxmemory-policy allkeys-lru
# Clear cache
docker compose exec redis redis-cli FLUSHALL
Error: Service container <name> failed or Failed to initialize container
Common Causes:
Solution:
Service containers have limitations. Instead, start containers manually:
Quick Fix Pattern:
# Don't use service containers for commands
steps:
- name: Start MinIO
run: |
NETWORK=$(docker network ls --format '' | grep github || echo "bridge")
docker run -d --name minio --network "$NETWORK" \
-p 9000:9000 minio/minio:latest \
server /data --console-address ":9001"
timeout 60 bash -c 'until curl -sf http://localhost:9000/minio/health/live; do sleep 2; done'
Error: Workflow exceeds time limit
Solution:
# Increase timeout in workflow
jobs:
test:
timeout-minutes: 30 # Increase as needed
Error: Environment variable undefined
Solution:
# Add secrets to GitHub repository
# Settings > Secrets and variables > Actions > New repository secret
# Reference in workflow
env:
DATABASE_URL: $
Error: Tests work on local machine but not in GitHub Actions
Solution:
# Match CI environment
docker compose -f docker compose.test.yml up -d
# Use same Python version as CI
pyenv install 3.11.0
pyenv local 3.11.0
# Clear caches
rm -rf __pycache__ .pytest_cache
pip cache purge
# Run tests with same flags as CI
pytest -v --cov=src --cov-report=xml
# Check environment variables
env | grep -E 'DATABASE|REDIS|MINIO|KEYCLOAK'
# All containers
docker compose logs -f
# Specific service
docker compose logs -f api-gateway
# Last N lines
docker compose logs --tail 100 signal-processor
# Since specific time
docker compose logs --since 2025-10-22T10:00:00
# Edit .env
LOG_LEVEL=DEBUG
# Restart services
docker compose restart
# View detailed logs
docker compose logs -f | grep DEBUG
# Using tcpdump
docker compose exec api-gateway tcpdump -i eth0 -w capture.pcap
# View with Wireshark
# Transfer file and open in Wireshark
#!/bin/bash
# Collect diagnostics for bug report
# System info
echo "=== System Info ===" > diagnostics.txt
uname -a >> diagnostics.txt
# Docker info
echo -e "\n=== Docker Info ===" >> diagnostics.txt
docker --version >> diagnostics.txt
docker compose version >> diagnostics.txt
# Container status
echo -e "\n=== Container Status ===" >> diagnostics.txt
docker compose ps >> diagnostics.txt
# Recent logs
echo -e "\n=== Recent Logs ===" >> diagnostics.txt
docker compose logs --tail 100 >> diagnostics.txt
# Environment (redacted)
echo -e "\n=== Environment ===" >> diagnostics.txt
grep -v PASSWORD .env | grep -v SECRET | grep -v API_KEY >> diagnostics.txt
| Related: FAQ | Support |