DREAMVFIA Test Master 自动化测试平台 - 完整开源项目完整数据代码包（部分二/三）-智慧文博士

TestMaster 自动化测试平台 - 第六部分：CI/CD 集成配置

2.6 CI/CD 集成模块

2.6.1 Jenkins Pipeline 配置`Jenkinsfile`

/** * TestMaster 自动化测试平台 - Jenkins Pipeline * * 功能： * - 自动构建和部署 * - 自动化测试执行 * - 测试报告生成 * - 质量门禁检查 */ pipeline { agent any environment { // Docker 镜像配置 DOCKER_REGISTRY = 'registry.example.com' DOCKER_CREDENTIALS_ID = 'docker-registry-credentials' IMAGE_TAG = "${env.BUILD_NUMBER}" // 应用配置 APP_NAME = 'testmaster' NAMESPACE = 'testmaster-prod' // 测试配置 TEST_ENV = 'staging' TESTMASTER_API = 'http://testmaster-api:3000' // 通知配置 SLACK_CHANNEL = '#testmaster-ci' EMAIL_RECIPIENTS = 'team@example.com' } options { // 保留最近 30 次构建 buildDiscarder(logRotator(numToKeepStr: '30')) // 超时时间 2 小时 timeout(time: 2, unit: 'HOURS') // 不允许并发构建 disableConcurrentBuilds() // 添加时间戳 timestamps() } parameters { choice( name: 'DEPLOY_ENV', choices: ['dev', 'staging', 'production'], description: '部署环境' ) booleanParam( name: 'RUN_TESTS', defaultValue: true, description: '是否运行自动化测试' ) booleanParam( name: 'RUN_PERFORMANCE_TESTS', defaultValue: false, description: '是否运行性能测试' ) string( name: 'TEST_SUITE', defaultValue: 'smoke', description: '测试套件名称（smoke/regression/full）' ) } stages { stage('Checkout') { steps { script { echo "🔄 Checking out code..." checkout scm // 获取 Git 信息 env.GIT_COMMIT_SHORT = sh( script: "git rev-parse --short HEAD", returnStdout: true ).trim() env.GIT_COMMIT_MSG = sh( script: "git log -1 --pretty=%B", returnStdout: true ).trim() env.GIT_AUTHOR = sh( script: "git log -1 --pretty=%an", returnStdout: true ).trim() } } } stage('Build') { parallel { stage('Build Frontend') { steps { script { echo "🏗️ Building frontend..." dir('frontend') { sh ''' npm ci npm run build ''' } } } } stage('Build Backend') { steps { script { echo "🏗️ Building backend..." dir('backend/gateway') { sh ''' npm ci npm run build ''' } } } } stage('Build Services') { steps { script { echo "🏗️ Building services..." // AI Generator dir('backend/services/ai-generator') { sh ''' python -m venv venv . venv/bin/activate pip install -r requirements.txt ''' } // Executor dir('backend/services/executor') { sh ''' python -m venv venv . venv/bin/activate pip install -r requirements.txt ''' } // Performance dir('backend/services/performance') { sh ''' python -m venv venv . venv/bin/activate pip install -r requirements.txt ''' } } } } } } stage('Unit Tests') { parallel { stage('Frontend Unit Tests') { steps { script { echo "🧪 Running frontend unit tests..." dir('frontend') { sh ''' npm run test:unit -- --coverage ''' } } } post { always { // 发布测试报告 publishHTML([ reportDir: 'frontend/coverage', reportFiles: 'index.html', reportName: 'Frontend Coverage Report' ]) } } } stage('Backend Unit Tests') { steps { script { echo "🧪 Running backend unit tests..." dir('backend/gateway') { sh ''' npm run test -- --coverage ''' } } } post { always { // 发布测试报告 publishHTML([ reportDir: 'backend/gateway/coverage', reportFiles: 'index.html', reportName: 'Backend Coverage Report' ]) } } } stage('Python Services Tests') { steps { script { echo "🧪 Running Python services tests..." sh ''' cd backend/services/ai-generator . venv/bin/activate pytest tests/ --cov=src --cov-report=html cd ../executor . venv/bin/activate pytest tests/ --cov=src --cov-report=html cd ../performance . venv/bin/activate pytest tests/ --cov=src --cov-report=html ''' } } } } } stage('Code Quality') { parallel { stage('ESLint') { steps { script { echo "📊 Running ESLint..." sh ''' cd frontend npm run lint -- --format json --output-file eslint-report.json || true cd ../backend/gateway npm run lint -- --format json --output-file eslint-report.json || true ''' } } } stage('SonarQube') { steps { script { echo "📊 Running SonarQube analysis..." withSonarQubeEnv('SonarQube') { sh ''' sonar-scanner \ -Dsonar.projectKey=testmaster \ -Dsonar.sources=. \ -Dsonar.host.url=${SONAR_HOST_URL} \ -Dsonar.login=${SONAR_AUTH_TOKEN} ''' } } } } stage('Security Scan') { steps { script { echo "🔒 Running security scan..." // npm audit sh ''' cd frontend npm audit --json > npm-audit-frontend.json || true cd ../backend/gateway npm audit --json > npm-audit-backend.json || true ''' // Python safety check sh ''' cd backend/services/ai-generator . venv/bin/activate safety check --json > safety-report.json || true ''' } } } } } stage('Build Docker Images') { steps { script { echo "🐳 Building Docker images..." // 构建所有服务的 Docker 镜像 def services = [ 'frontend', 'gateway', 'ai-generator', 'executor', 'performance' ] services.each { service -> sh """ docker build -t ${DOCKER_REGISTRY}/${APP_NAME}-${service}:${IMAGE_TAG} \ -t ${DOCKER_REGISTRY}/${APP_NAME}-${service}:latest \ -f docker/${service}/Dockerfile . """ } } } } stage('Push Docker Images') { when { expression { params.DEPLOY_ENV != 'dev' } } steps { script { echo "📤 Pushing Docker images..." docker.withRegistry("https://${DOCKER_REGISTRY}", DOCKER_CREDENTIALS_ID) { def services = [ 'frontend', 'gateway', 'ai-generator', 'executor', 'performance' ] services.each { service -> sh """ docker push ${DOCKER_REGISTRY}/${APP_NAME}-${service}:${IMAGE_TAG} docker push ${DOCKER_REGISTRY}/${APP_NAME}-${service}:latest """ } } } } } stage('Deploy to Environment') { when { expression { params.DEPLOY_ENV != 'dev' } } steps { script { echo "🚀 Deploying to ${params.DEPLOY_ENV}..." // 使用 Kubernetes 部署 withKubeConfig([credentialsId: 'k8s-credentials']) { sh """ # 更新镜像标签 kubectl set image deployment/testmaster-frontend \ frontend=${DOCKER_REGISTRY}/${APP_NAME}-frontend:${IMAGE_TAG} \ -n ${NAMESPACE} kubectl set image deployment/testmaster-gateway \ gateway=${DOCKER_REGISTRY}/${APP_NAME}-gateway:${IMAGE_TAG} \ -n ${NAMESPACE} kubectl set image deployment/testmaster-ai-generator \ ai-generator=${DOCKER_REGISTRY}/${APP_NAME}-ai-generator:${IMAGE_TAG} \ -n ${NAMESPACE} kubectl set image deployment/testmaster-executor \ executor=${DOCKER_REGISTRY}/${APP_NAME}-executor:${IMAGE_TAG} \ -n ${NAMESPACE} kubectl set image deployment/testmaster-performance \ performance=${DOCKER_REGISTRY}/${APP_NAME}-performance:${IMAGE_TAG} \ -n ${NAMESPACE} # 等待部署完成 kubectl rollout status deployment/testmaster-frontend -n ${NAMESPACE} kubectl rollout status deployment/testmaster-gateway -n ${NAMESPACE} kubectl rollout status deployment/testmaster-ai-generator -n ${NAMESPACE} kubectl rollout status deployment/testmaster-executor -n ${NAMESPACE} kubectl rollout status deployment/testmaster-performance -n ${NAMESPACE} """ } } } } stage('Smoke Tests') { when { expression { params.RUN_TESTS } } steps { script { echo "🔥 Running smoke tests..." // 调用 TestMaster API 执行冒烟测试 sh """ curl -X POST ${TESTMASTER_API}/api/executions/suite \ -H "Content-Type: application/json" \ -d '{ "suiteId": "smoke-tests", "environment": "${params.DEPLOY_ENV}", "browser": "chrome", "triggeredBy": "jenkins", "ciBuildId": "${env.BUILD_NUMBER}" }' \ -o smoke-test-result.json # 等待测试完成 sleep 60 # 获取测试结果 EXECUTION_ID=\$(cat smoke-test-result.json | jq -r '.executionId') curl ${TESTMASTER_API}/api/executions/\${EXECUTION_ID} \ -o smoke-test-final.json # 检查测试是否通过 STATUS=\$(cat smoke-test-final.json | jq -r '.status') if [ "\$STATUS" != "passed" ]; then echo "❌ Smoke tests failed!" exit 1 fi echo "✅ Smoke tests passed!" """ } } post { always { archiveArtifacts artifacts: 'smoke-test-*.json', allowEmptyArchive: true } } } stage('Integration Tests') { when { expression { params.RUN_TESTS && params.TEST_SUITE != 'smoke' } } steps { script { echo "🔗 Running integration tests..." sh """ curl -X POST ${TESTMASTER_API}/api/executions/suite \ -H "Content-Type: application/json" \ -d '{ "suiteId": "integration-tests", "environment": "${params.DEPLOY_ENV}", "browser": "chrome", "triggeredBy": "jenkins", "ciBuildId": "${env.BUILD_NUMBER}" }' \ -o integration-test-result.json # 等待测试完成 sleep 300 # 获取测试结果 EXECUTION_ID=\$(cat integration-test-result.json | jq -r '.executionId') curl ${TESTMASTER_API}/api/executions/\${EXECUTION_ID} \ -o integration-test-final.json # 生成测试报告 curl ${TESTMASTER_API}/api/reports/\${EXECUTION_ID} \ -o integration-test-report.html """ } } post { always { publishHTML([ reportDir: '.', reportFiles: 'integration-test-report.html', reportName: 'Integration Test Report' ]) archiveArtifacts artifacts: 'integration-test-*.json', allowEmptyArchive: true } } } stage('Performance Tests') { when { expression { params.RUN_PERFORMANCE_TESTS } } steps { script { echo "⚡ Running performance tests..." sh """ curl -X POST ${TESTMASTER_API}/api/performance/tests/start \ -H "Content-Type: application/json" \ -d '{ "test_id": "perf-test-${env.BUILD_NUMBER}", "name": "CI Performance Test", "target_url": "https://${params.DEPLOY_ENV}.testmaster.example.com", "test_type": "load", "duration": 300, "users": 100, "spawn_rate": 10, "scenarios": [ { "name": "Homepage", "method": "GET", "path": "/", "weight": 50 }, { "name": "API Health", "method": "GET", "path": "/api/health", "weight": 50 } ], "runner": "locust" }' \ -o performance-test-result.json # 等待测试完成 sleep 360 # 获取测试报告 TEST_ID=\$(cat performance-test-result.json | jq -r '.test_id') curl ${TESTMASTER_API}/api/performance/tests/\${TEST_ID}/report \ -o performance-test-report.json # 检查性能是否达标 SCORE=\$(cat performance-test-report.json | jq -r '.analysis.overall_score') if [ "\$SCORE" -lt 60 ]; then echo "⚠️ Performance score is below threshold: \$SCORE" # 不阻塞部署，只是警告 fi """ } } post { always { archiveArtifacts artifacts: 'performance-test-*.json', allowEmptyArchive: true } } } stage('Quality Gate') { steps { script { echo "🚪 Checking quality gate..." // 等待 SonarQube 质量门禁结果 timeout(time: 10, unit: 'MINUTES') { def qg = waitForQualityGate() if (qg.status != 'OK') { error "Quality gate failed: ${qg.status}" } } } } } } post { success { script { echo "✅ Pipeline succeeded!" // 发送成功通知 slackSend( channel: env.SLACK_CHANNEL, color: 'good', message: """ ✅ *TestMaster Build Succeeded* *Build:* #${env.BUILD_NUMBER} *Environment:* ${params.DEPLOY_ENV} *Commit:* ${env.GIT_COMMIT_SHORT} *Author:* ${env.GIT_AUTHOR} *Message:* ${env.GIT_COMMIT_MSG} <${env.BUILD_URL}|View Build> """.stripIndent() ) emailext( to: env.EMAIL_RECIPIENTS, subject: "✅ TestMaster Build #${env.BUILD_NUMBER} Succeeded", body: """ <h2>Build Succeeded</h2> <p><strong>Build:</strong> #${env.BUILD_NUMBER}</p> <p><strong>Environment:</strong> ${params.DEPLOY_ENV}</p> <p><strong>Commit:</strong> ${env.GIT_COMMIT_SHORT}</p> <p><strong>Author:</strong> ${env.GIT_AUTHOR}</p> <p><strong>Message:</strong> ${env.GIT_COMMIT_MSG}</p> <p><a href="${env.BUILD_URL}">View Build</a></p> """, mimeType: 'text/html' ) } } failure { script { echo "❌ Pipeline failed!" // 发送失败通知 slackSend( channel: env.SLACK_CHANNEL, color: 'danger', message: """ ❌ *TestMaster Build Failed* *Build:* #${env.BUILD_NUMBER} *Environment:* ${params.DEPLOY_ENV} *Commit:* ${env.GIT_COMMIT_SHORT} *Author:* ${env.GIT_AUTHOR} *Message:* ${env.GIT_COMMIT_MSG} <${env.BUILD_URL}|View Build> """.stripIndent() ) emailext( to: env.EMAIL_RECIPIENTS, subject: "❌ TestMaster Build #${env.BUILD_NUMBER} Failed", body: """ <h2>Build Failed</h2> <p><strong>Build:</strong> #${env.BUILD_NUMBER}</p> <p><strong>Environment:</strong> ${params.DEPLOY_ENV}</p> <p><strong>Commit:</strong> ${env.GIT_COMMIT_SHORT}</p> <p><strong>Author:</strong> ${env.GIT_AUTHOR}</p> <p><strong>Message:</strong> ${env.GIT_COMMIT_MSG}</p> <p><a href="${env.BUILD_URL}">View Build</a></p> """, mimeType: 'text/html' ) } } always { // 清理工作空间 cleanWs() } } }

2.6.2 GitLab CI 配置`.gitlab-ci.yml`

# TestMaster 自动化测试平台 - GitLab CI/CD 配置 # 定义阶段 stages: - build - test - quality - package - deploy - e2e-test - performance # 全局变量 variables: DOCKER_DRIVER: overlay2 DOCKER_TLS_CERTDIR: "/certs" DOCKER_REGISTRY: registry.gitlab.com IMAGE_TAG: $CI_COMMIT_SHORT_SHA KUBERNETES_NAMESPACE: testmaster-$CI_ENVIRONMENT_NAME # Node.js 版本 NODE_VERSION: "18" # Python 版本 PYTHON_VERSION: "3.11" # 测试配置 TESTMASTER_API: http://testmaster-api:3000 # 缓存配置 cache: key: ${CI_COMMIT_REF_SLUG} paths: - frontend/node_modules/ - backend/gateway/node_modules/ - backend/services/*/venv/ # ============================================================================ # 构建阶段 # ============================================================================ build:frontend: stage: build image: node:${NODE_VERSION}-alpine script: - echo "🏗️ Building frontend..." - cd frontend - npm ci - npm run build artifacts: paths: - frontend/dist/ expire_in: 1 day only: - branches - tags build:backend: stage: build image: node:${NODE_VERSION}-alpine script: - echo "🏗️ Building backend..." - cd backend/gateway - npm ci - npm run build artifacts: paths: - backend/gateway/dist/ expire_in: 1 day only: - branches - tags build:ai-generator: stage: build image: python:${PYTHON_VERSION}-slim script: - echo "🏗️ Building AI Generator service..." - cd backend/services/ai-generator - python -m venv venv - source venv/bin/activate - pip install -r requirements.txt artifacts: paths: - backend/services/ai-generator/venv/ expire_in: 1 day only: - branches - tags build:executor: stage: build image: python:${PYTHON_VERSION}-slim script: - echo "🏗️ Building Executor service..." - cd backend/services/executor - python -m venv venv - source venv/bin/activate - pip install -r requirements.txt artifacts: paths: - backend/services/executor/venv/ expire_in: 1 day only: - branches - tags build:performance: stage: build image: python:${PYTHON_VERSION}-slim script: - echo "🏗️ Building Performance service..." - cd backend/services/performance - python -m venv venv - source venv/bin/activate - pip install -r requirements.txt artifacts: paths: - backend/services/performance/venv/ expire_in: 1 day only: - branches - tags # ============================================================================ # 测试阶段 # ============================================================================ test:frontend:unit: stage: test image: node:${NODE_VERSION}-alpine dependencies: - build:frontend script: - echo "🧪 Running frontend unit tests..." - cd frontend - npm ci - npm run test:unit -- --coverage coverage: '/All files[^|]*\|[^|]*\s+([\d\.]+)/' artifacts: reports: coverage_report: coverage_format: cobertura path: frontend/coverage/cobertura-coverage.xml paths: - frontend/coverage/ expire_in: 7 days only: - branches - merge_requests test:backend:unit: stage: test image: node:${NODE_VERSION}-alpine dependencies: - build:backend script: - echo "🧪 Running backend unit tests..." - cd backend/gateway - npm ci - npm run test -- --coverage coverage: '/All files[^|]*\|[^|]*\s+([\d\.]+)/' artifacts: reports: coverage_report: coverage_format: cobertura path: backend/gateway/coverage/cobertura-coverage.xml paths: - backend/gateway/coverage/ expire_in: 7 days only: - branches - merge_requests test:services:unit: stage: test image: python:${PYTHON_VERSION}-slim dependencies: - build:ai-generator - build:executor - build:performance script: - echo "🧪 Running Python services unit tests..." # AI Generator - cd backend/services/ai-generator - source venv/bin/activate - pytest tests/ --cov=src --cov-report=xml --cov-report=html - cd ../../.. # Executor - cd backend/services/executor - source venv/bin/activate - pytest tests/ --cov=src --cov-report=xml --cov-report=html - cd ../../.. # Performance - cd backend/services/performance - source venv/bin/activate - pytest tests/ --cov=src --cov-report=xml --cov-report=html coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/' artifacts: reports: coverage_report: coverage_format: cobertura path: backend/services/*/coverage.xml paths: - backend/services/*/htmlcov/ expire_in: 7 days only: - branches - merge_requests # ============================================================================ # 代码质量阶段 # ============================================================================ quality:eslint: stage: quality image: node:${NODE_VERSION}-alpine script: - echo "📊 Running ESLint..." - cd frontend - npm ci - npm run lint -- --format json --output-file ../eslint-frontend.json || true - cd ../backend/gateway - npm ci - npm run lint -- --format json --output-file ../../eslint-backend.json || true artifacts: paths: - eslint-*.json expire_in: 7 days only: - branches - merge_requests quality:sonarqube: stage: quality image: sonarsource/sonar-scanner-cli:latest variables: SONAR_USER_HOME: "${CI_PROJECT_DIR}/.sonar" GIT_DEPTH: "0" cache: key: "${CI_JOB_NAME}" paths: - .sonar/cache script: - echo "📊 Running SonarQube analysis..." - sonar-scanner -Dsonar.projectKey=testmaster -Dsonar.sources=. -Dsonar.host.url=${SONAR_HOST_URL} -Dsonar.login=${SONAR_TOKEN} -Dsonar.javascript.lcov.reportPaths=frontend/coverage/lcov.info,backend/gateway/coverage/lcov.info -Dsonar.python.coverage.reportPaths=backend/services/*/coverage.xml only: - branches - merge_requests quality:security: stage: quality image: node:${NODE_VERSION}-alpine script: - echo "🔒 Running security scan..." # npm audit - cd frontend - npm audit --json > ../npm-audit-frontend.json || true - cd ../backend/gateway - npm audit --json > ../../npm-audit-backend.json || true # Python safety check - cd ../../backend/services/ai-generator - source venv/bin/activate - pip install safety - safety check --json > ../../../safety-ai-generator.json || true artifacts: paths: - npm-audit-*.json - safety-*.json expire_in: 7 days only: - branches - merge_requests # ============================================================================ # 打包阶段 # ============================================================================ package:docker: stage: package image: docker:latest services: - docker:dind before_script: - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY script: - echo "🐳 Building and pushing Docker images..." # Frontend - docker build -t $CI_REGISTRY_IMAGE/frontend:$IMAGE_TAG -t $CI_REGISTRY_IMAGE/frontend:latest -f docker/frontend/Dockerfile . - docker push $CI_REGISTRY_IMAGE/frontend:$IMAGE_TAG - docker push $CI_REGISTRY_IMAGE/frontend:latest # Gateway - docker build -t $CI_REGISTRY_IMAGE/gateway:$IMAGE_TAG -t $CI_REGISTRY_IMAGE/gateway:latest -f docker/gateway/Dockerfile . - docker push $CI_REGISTRY_IMAGE/gateway:$IMAGE_TAG - docker push $CI_REGISTRY_IMAGE/gateway:latest # AI Generator - docker build -t $CI_REGISTRY_IMAGE/ai-generator:$IMAGE_TAG -t $CI_REGISTRY_IMAGE/ai-generator:latest -f docker/ai-generator/Dockerfile . - docker push $CI_REGISTRY_IMAGE/ai-generator:$IMAGE_TAG - docker push $CI_REGISTRY_IMAGE/ai-generator:latest # Executor - docker build -t $CI_REGISTRY_IMAGE/executor:$IMAGE_TAG -t $CI_REGISTRY_IMAGE/executor:latest -f docker/executor/Dockerfile . - docker push $CI_REGISTRY_IMAGE/executor:$IMAGE_TAG - docker push $CI_REGISTRY_IMAGE/executor:latest # Performance - docker build -t $CI_REGISTRY_IMAGE/performance:$IMAGE_TAG -t $CI_REGISTRY_IMAGE/performance:latest -f docker/performance/Dockerfile . - docker push $CI_REGISTRY_IMAGE/performance:$IMAGE_TAG - docker push $CI_REGISTRY_IMAGE/performance:latest only: - main - develop - tags # ============================================================================ # 部署阶段 # ============================================================================ deploy:staging: stage: deploy image: bitnami/kubectl:latest environment: name: staging url: https://staging.testmaster.example.com before_script: - kubectl config use-context $KUBE_CONTEXT script: - echo "🚀 Deploying to staging..." # 更新 Kubernetes 部署 - kubectl set image deployment/testmaster-frontend frontend=$CI_REGISTRY_IMAGE/frontend:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-gateway gateway=$CI_REGISTRY_IMAGE/gateway:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-ai-generator ai-generator=$CI_REGISTRY_IMAGE/ai-generator:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-executor executor=$CI_REGISTRY_IMAGE/executor:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-performance performance=$CI_REGISTRY_IMAGE/performance:$IMAGE_TAG -n $KUBERNETES_NAMESPACE # 等待部署完成 - kubectl rollout status deployment/testmaster-frontend -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-gateway -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-ai-generator -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-executor -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-performance -n $KUBERNETES_NAMESPACE only: - develop deploy:production: stage: deploy image: bitnami/kubectl:latest environment: name: production url: https://testmaster.example.com before_script: - kubectl config use-context $KUBE_CONTEXT script: - echo "🚀 Deploying to production..." # 更新 Kubernetes 部署 - kubectl set image deployment/testmaster-frontend frontend=$CI_REGISTRY_IMAGE/frontend:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-gateway gateway=$CI_REGISTRY_IMAGE/gateway:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-ai-generator ai-generator=$CI_REGISTRY_IMAGE/ai-generator:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-executor executor=$CI_REGISTRY_IMAGE/executor:$IMAGE_TAG -n $KUBERNETES_NAMESPACE - kubectl set image deployment/testmaster-performance performance=$CI_REGISTRY_IMAGE/performance:$IMAGE_TAG -n $KUBERNETES_NAMESPACE # 等待部署完成 - kubectl rollout status deployment/testmaster-frontend -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-gateway -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-ai-generator -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-executor -n $KUBERNETES_NAMESPACE - kubectl rollout status deployment/testmaster-performance -n $KUBERNETES_NAMESPACE when: manual only: - main - tags # ============================================================================ # E2E 测试阶段 # ============================================================================ e2e:smoke: stage: e2e-test image: curlimages/curl:latest dependencies: [] script: - echo "🔥 Running smoke tests..." # 调用 TestMaster API 执行冒烟测试 - | curl -X POST $TESTMASTER_API/api/executions/suite \ -H "Content-Type: application/json" \ -d '{ "suiteId": "smoke-tests", "environment": "staging", "browser": "chrome", "triggeredBy": "gitlab-ci", "ciBuildId": "'$CI_PIPELINE_ID'" }' \ -o smoke-test-result.json # 等待测试完成 - sleep 60 # 获取测试结果 - EXECUTION_ID=$(cat smoke-test-result.json | jq -r '.executionId') - curl $TESTMASTER_API/api/executions/$EXECUTION_ID -o smoke-test-final.json # 检查测试是否通过 - STATUS=$(cat smoke-test-final.json | jq -r '.status') - | if [ "$STATUS" != "passed" ]; then echo "❌ Smoke tests failed!" exit 1 fi - echo "✅ Smoke tests passed!" artifacts: paths: - smoke-test-*.json expire_in: 7 days only: - develop - main # ============================================================================ # 性能测试阶段 # ============================================================================ performance:load: stage: performance image: curlimages/curl:latest dependencies: [] script: - echo "⚡ Running performance tests..." # 启动性能测试 - | curl -X POST $TESTMASTER_API/api/performance/tests/start \ -H "Content-Type: application/json" \ -d '{ "test_id": "perf-test-'$CI_PIPELINE_ID'", "name": "CI Performance Test", "target_url": "https://staging.testmaster.example.com", "test_type": "load", "duration": 300, "users": 100, "spawn_rate": 10, "scenarios": [ { "name": "Homepage", "method": "GET", "path": "/", "weight": 50 }, { "name": "API Health", "method": "GET", "path": "/api/health", "weight": 50 } ], "runner": "locust" }' \ -o performance-test-result.json # 等待测试完成 - sleep 360 # 获取测试报告 - TEST_ID=$(cat performance-test-result.json | jq -r '.test_id') - curl $TESTMASTER_API/api/performance/tests/$TEST_ID/report -o performance-test-report.json # 检查性能是否达标 - SCORE=$(cat performance-test-report.json | jq -r '.analysis.overall_score') - | if [ "$SCORE" -lt 60 ]; then echo "⚠️ Performance score is below threshold: $SCORE" fi artifacts: paths: - performance-test-*.json expire_in: 7 days when: manual only: - develop - main

继续下一部分...

2.6.3 GitHub Actions 配置`.github/workflows/ci-cd.yml`

name: TestMaster CI/CD on: push: branches: - main - develop tags: - 'v*' pull_request: branches: - main - develop env: NODE_VERSION: '18' PYTHON_VERSION: '3.11' DOCKER_REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} jobs: # ============================================================================ # 构建作业 # ============================================================================ build-frontend: name: Build Frontend runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup Node.js uses: actions/setup-node@v3 with: node-version: ${{ env.NODE_VERSION }} cache: 'npm' cache-dependency-path: frontend/package-lock.json - name: Install dependencies run: | cd frontend npm ci - name: Build run: | cd frontend npm run build - name: Upload build artifacts uses: actions/upload-artifact@v3 with: name: frontend-dist path: frontend/dist/ retention-days: 1 build-backend: name: Build Backend runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup Node.js uses: actions/setup-node@v3 with: node-version: ${{ env.NODE_VERSION }} cache: 'npm' cache-dependency-path: backend/gateway/package-lock.json - name: Install dependencies run: | cd backend/gateway npm ci - name: Build run: | cd backend/gateway npm run build - name: Upload build artifacts uses: actions/upload-artifact@v3 with: name: backend-dist path: backend/gateway/dist/ retention-days: 1 build-services: name: Build Python Services runs-on: ubuntu-latest strategy: matrix: service: [ai-generator, executor, performance] steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' - name: Install dependencies run: | cd backend/services/${{ matrix.service }} python -m venv venv source venv/bin/activate pip install -r requirements.txt # ============================================================================ # 测试作业 # ============================================================================ test-frontend: name: Test Frontend runs-on: ubuntu-latest needs: build-frontend steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup Node.js uses: actions/setup-node@v3 with: node-version: ${{ env.NODE_VERSION }} cache: 'npm' cache-dependency-path: frontend/package-lock.json - name: Install dependencies run: | cd frontend npm ci - name: Run unit tests run: | cd frontend npm run test:unit -- --coverage - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 with: files: ./frontend/coverage/lcov.info flags: frontend name: frontend-coverage test-backend: name: Test Backend runs-on: ubuntu-latest needs: build-backend steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup Node.js uses: actions/setup-node@v3 with: node-version: ${{ env.NODE_VERSION }} cache: 'npm' cache-dependency-path: backend/gateway/package-lock.json - name: Install dependencies run: | cd backend/gateway npm ci - name: Run unit tests run: | cd backend/gateway npm run test -- --coverage - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 with: files: ./backend/gateway/coverage/lcov.info flags: backend name: backend-coverage test-services: name: Test Python Services runs-on: ubuntu-latest needs: build-services strategy: matrix: service: [ai-generator, executor, performance] steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} - name: Install dependencies run: | cd backend/services/${{ matrix.service }} python -m venv venv source venv/bin/activate pip install -r requirements.txt pip install pytest pytest-cov - name: Run unit tests run: | cd backend/services/${{ matrix.service }} source venv/bin/activate pytest tests/ --cov=src --cov-report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 with: files: ./backend/services/${{ matrix.service }}/coverage.xml flags: ${{ matrix.service }} name: ${{ matrix.service }}-coverage # ============================================================================ # 代码质量作业 # ============================================================================ lint: name: Lint Code runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup Node.js uses: actions/setup-node@v3 with: node-version: ${{ env.NODE_VERSION }} - name: Lint frontend run: | cd frontend npm ci npm run lint - name: Lint backend run: | cd backend/gateway npm ci npm run lint security-scan: name: Security Scan runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master with: scan-type: 'fs' scan-ref: '.' format: 'sarif' output: 'trivy-results.sarif' - name: Upload Trivy results to GitHub Security uses: github/codeql-action/upload-sarif@v2 with: sarif_file: 'trivy-results.sarif' # ============================================================================ # Docker 构建作业 # ============================================================================ build-docker: name: Build Docker Images runs-on: ubuntu-latest needs: [test-frontend, test-backend, test-services] if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop' || startsWith(github.ref, 'refs/tags/')) strategy: matrix: service: [frontend, gateway, ai-generator, executor, performance] steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Log in to GitHub Container Registry uses: docker/login-action@v2 with: registry: ${{ env.DOCKER_REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Extract metadata id: meta uses: docker/metadata-action@v4 with: images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/${{ matrix.service }} tags: | type=ref,event=branch type=ref,event=pr type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=sha - name: Build and push uses: docker/build-push-action@v4 with: context: . file: ./docker/${{ matrix.service }}/Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max # ============================================================================ # 部署作业 # ============================================================================ deploy-staging: name: Deploy to Staging runs-on: ubuntu-latest needs: build-docker if: github.ref == 'refs/heads/develop' environment: name: staging url: https://staging.testmaster.example.com steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup kubectl uses: azure/setup-kubectl@v3 - name: Configure kubectl run: | echo "${{ secrets.KUBE_CONFIG }}" | base64 -d > kubeconfig export KUBECONFIG=kubeconfig - name: Deploy to Kubernetes run: | kubectl set image deployment/testmaster-frontend frontend=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/frontend:${{ github.sha }} -n testmaster-staging kubectl set image deployment/testmaster-gateway gateway=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/gateway:${{ github.sha }} -n testmaster-staging kubectl set image deployment/testmaster-ai-generator ai-generator=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/ai-generator:${{ github.sha }} -n testmaster-staging kubectl set image deployment/testmaster-executor executor=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/executor:${{ github.sha }} -n testmaster-staging kubectl set image deployment/testmaster-performance performance=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/performance:${{ github.sha }} -n testmaster-staging kubectl rollout status deployment/testmaster-frontend -n testmaster-staging kubectl rollout status deployment/testmaster-gateway -n testmaster-staging kubectl rollout status deployment/testmaster-ai-generator -n testmaster-staging kubectl rollout status deployment/testmaster-executor -n testmaster-staging kubectl rollout status deployment/testmaster-performance -n testmaster-staging deploy-production: name: Deploy to Production runs-on: ubuntu-latest needs: build-docker if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') environment: name: production url: https://testmaster.example.com steps: - name: Checkout code uses: actions/checkout@v3 - name: Setup kubectl uses: azure/setup-kubectl@v3 - name: Configure kubectl run: | echo "${{ secrets.KUBE_CONFIG }}" | base64 -d > kubeconfig export KUBECONFIG=kubeconfig - name: Deploy to Kubernetes run: | kubectl set image deployment/testmaster-frontend frontend=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/frontend:${{ github.sha }} -n testmaster-production kubectl set image deployment/testmaster-gateway gateway=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/gateway:${{ github.sha }} -n testmaster-production kubectl set image deployment/testmaster-ai-generator ai-generator=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/ai-generator:${{ github.sha }} -n testmaster-production kubectl set image deployment/testmaster-executor executor=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/executor:${{ github.sha }} -n testmaster-production kubectl set image deployment/testmaster-performance performance=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}/performance:${{ github.sha }} -n testmaster-production kubectl rollout status deployment/testmaster-frontend -n testmaster-production kubectl rollout status deployment/testmaster-gateway -n testmaster-production kubectl rollout status deployment/testmaster-ai-generator -n testmaster-production kubectl rollout status deployment/testmaster-executor -n testmaster-production kubectl rollout status deployment/testmaster-performance -n testmaster-production # ============================================================================ # E2E 测试作业 # ============================================================================ e2e-tests: name: E2E Tests runs-on: ubuntu-latest needs: deploy-staging if: github.ref == 'refs/heads/develop' steps: - name: Run smoke tests run: | curl -X POST https://staging.testmaster.example.com/api/executions/suite \ -H "Content-Type: application/json" \ -d '{ "suiteId": "smoke-tests", "environment": "staging", "browser": "chrome", "triggeredBy": "github-actions", "ciBuildId": "${{ github.run_id }}" }' \ -o smoke-test-result.json sleep 60 EXECUTION_ID=$(cat smoke-test-result.json | jq -r '.executionId') curl https://staging.testmaster.example.com/api/executions/$EXECUTION_ID -o smoke-test-final.json STATUS=$(cat smoke-test-final.json | jq -r '.status') if [ "$STATUS" != "passed" ]; then echo "❌ Smoke tests failed!" exit 1 fi echo "✅ Smoke tests passed!" - name: Upload test results uses: actions/upload-artifact@v3 if: always() with: name: e2e-test-results path: smoke-test-*.json

TestMaster 自动化测试平台 - 第七部分：Docker Compose 完整配置

2.7 Docker Compose 配置

2.7.1 主配置文件`docker-compose.yml`

# TestMaster 自动化测试平台 - Docker Compose 配置 # 版本: 1.0.0 # 用途: 本地开发和测试环境 version: '3.8' # ============================================================================ # 网络配置 # ============================================================================ networks: testmaster-network: driver: bridge ipam: config: - subnet: 172.20.0.0/16 # ============================================================================ # 数据卷配置 # ============================================================================ volumes: # 数据库数据 postgres-data: driver: local mongo-data: driver: local redis-data: driver: local # 消息队列数据 rabbitmq-data: driver: local # 监控数据 prometheus-data: driver: local grafana-data: driver: local elasticsearch-data: driver: local # MinIO 数据 minio-data: driver: local # 测试报告 test-reports: driver: local # 测试录像 test-recordings: driver: local # ============================================================================ # 服务配置 # ============================================================================ services: # ========================================================================== # 数据库服务 # ========================================================================== # PostgreSQL - 主数据库 postgres: image: postgres:15-alpine container_name: testmaster-postgres hostname: postgres restart: unless-stopped environment: POSTGRES_DB: testmaster POSTGRES_USER: testmaster POSTGRES_PASSWORD: testmaster_password_2024 POSTGRES_INITDB_ARGS: "--encoding=UTF8 --locale=en_US.UTF-8" PGDATA: /var/lib/postgresql/data/pgdata ports: - "5432:5432" volumes: - postgres-data:/var/lib/postgresql/data - ./docker/postgres/init:/docker-entrypoint-initdb.d networks: - testmaster-network healthcheck: test: ["CMD-SHELL", "pg_isready -U testmaster"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # MongoDB - 测试数据和日志 mongodb: image: mongo:7 container_name: testmaster-mongodb hostname: mongodb restart: unless-stopped environment: MONGO_INITDB_ROOT_USERNAME: testmaster MONGO_INITDB_ROOT_PASSWORD: testmaster_password_2024 MONGO_INITDB_DATABASE: testmaster ports: - "27017:27017" volumes: - mongo-data:/data/db - ./docker/mongodb/init:/docker-entrypoint-initdb.d networks: - testmaster-network healthcheck: test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Redis - 缓存和会话 redis: image: redis:7-alpine container_name: testmaster-redis hostname: redis restart: unless-stopped command: redis-server --requirepass testmaster_password_2024 --appendonly yes ports: - "6379:6379" volumes: - redis-data:/data networks: - testmaster-network healthcheck: test: ["CMD", "redis-cli", "--raw", "incr", "ping"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ========================================================================== # 消息队列服务 # ========================================================================== # RabbitMQ - 消息队列 rabbitmq: image: rabbitmq:3.12-management-alpine container_name: testmaster-rabbitmq hostname: rabbitmq restart: unless-stopped environment: RABBITMQ_DEFAULT_USER: testmaster RABBITMQ_DEFAULT_PASS: testmaster_password_2024 RABBITMQ_DEFAULT_VHOST: testmaster ports: - "5672:5672" # AMQP - "15672:15672" # Management UI volumes: - rabbitmq-data:/var/lib/rabbitmq - ./docker/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf networks: - testmaster-network healthcheck: test: rabbitmq-diagnostics -q ping interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ========================================================================== # 对象存储服务 # ========================================================================== # MinIO - 对象存储 minio: image: minio/minio:latest container_name: testmaster-minio hostname: minio restart: unless-stopped command: server /data --console-address ":9001" environment: MINIO_ROOT_USER: testmaster MINIO_ROOT_PASSWORD: testmaster_password_2024 ports: - "9000:9000" # API - "9001:9001" # Console volumes: - minio-data:/data networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # MinIO 初始化 minio-init: image: minio/mc:latest container_name: testmaster-minio-init depends_on: - minio entrypoint: > /bin/sh -c " sleep 5; /usr/bin/mc config host add myminio http://minio:9000 testmaster testmaster_password_2024; /usr/bin/mc mb myminio/test-reports --ignore-existing; /usr/bin/mc mb myminio/test-recordings --ignore-existing; /usr/bin/mc mb myminio/test-screenshots --ignore-existing; /usr/bin/mc anonymous set download myminio/test-reports; /usr/bin/mc anonymous set download myminio/test-recordings; /usr/bin/mc anonymous set download myminio/test-screenshots; exit 0; " networks: - testmaster-network # ========================================================================== # Selenium Grid 服务 # ========================================================================== # Selenium Hub selenium-hub: image: selenium/hub:4.15.0 container_name: testmaster-selenium-hub hostname: selenium-hub restart: unless-stopped ports: - "4444:4444" # Selenium Grid - "4442:4442" # Event Bus Publish - "4443:4443" # Event Bus Subscribe environment: SE_SESSION_REQUEST_TIMEOUT: 300 SE_SESSION_RETRY_INTERVAL: 5 SE_HEALTHCHECK_INTERVAL: 10 networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:4444/wd/hub/status"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Chrome Node selenium-chrome: image: selenium/node-chrome:4.15.0 container_name: testmaster-selenium-chrome hostname: selenium-chrome restart: unless-stopped depends_on: - selenium-hub environment: SE_EVENT_BUS_HOST: selenium-hub SE_EVENT_BUS_PUBLISH_PORT: 4442 SE_EVENT_BUS_SUBSCRIBE_PORT: 4443 SE_NODE_MAX_SESSIONS: 5 SE_NODE_SESSION_TIMEOUT: 300 SE_VNC_NO_PASSWORD: 1 ports: - "7900:7900" # VNC volumes: - /dev/shm:/dev/shm - test-recordings:/recordings networks: - testmaster-network shm_size: 2gb logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Firefox Node selenium-firefox: image: selenium/node-firefox:4.15.0 container_name: testmaster-selenium-firefox hostname: selenium-firefox restart: unless-stopped depends_on: - selenium-hub environment: SE_EVENT_BUS_HOST: selenium-hub SE_EVENT_BUS_PUBLISH_PORT: 4442 SE_EVENT_BUS_SUBSCRIBE_PORT: 4443 SE_NODE_MAX_SESSIONS: 5 SE_NODE_SESSION_TIMEOUT: 300 SE_VNC_NO_PASSWORD: 1 ports: - "7901:7900" # VNC volumes: - /dev/shm:/dev/shm - test-recordings:/recordings networks: - testmaster-network shm_size: 2gb logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Edge Node selenium-edge: image: selenium/node-edge:4.15.0 container_name: testmaster-selenium-edge hostname: selenium-edge restart: unless-stopped depends_on: - selenium-hub environment: SE_EVENT_BUS_HOST: selenium-hub SE_EVENT_BUS_PUBLISH_PORT: 4442 SE_EVENT_BUS_SUBSCRIBE_PORT: 4443 SE_NODE_MAX_SESSIONS: 5 SE_NODE_SESSION_TIMEOUT: 300 SE_VNC_NO_PASSWORD: 1 ports: - "7902:7900" # VNC volumes: - /dev/shm:/dev/shm - test-recordings:/recordings networks: - testmaster-network shm_size: 2gb logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ========================================================================== # 后端服务 # ========================================================================== # API Gateway gateway: build: context: . dockerfile: docker/gateway/Dockerfile container_name: testmaster-gateway hostname: gateway restart: unless-stopped depends_on: postgres: condition: service_healthy redis: condition: service_healthy rabbitmq: condition: service_healthy environment: NODE_ENV: development PORT: 3000 # 数据库配置 DB_HOST: postgres DB_PORT: 5432 DB_NAME: testmaster DB_USER: testmaster DB_PASSWORD: testmaster_password_2024 # Redis 配置 REDIS_HOST: redis REDIS_PORT: 6379 REDIS_PASSWORD: testmaster_password_2024 # RabbitMQ 配置 RABBITMQ_HOST: rabbitmq RABBITMQ_PORT: 5672 RABBITMQ_USER: testmaster RABBITMQ_PASSWORD: testmaster_password_2024 RABBITMQ_VHOST: testmaster # JWT 配置 JWT_SECRET: testmaster_jwt_secret_key_2024_change_in_production JWT_EXPIRES_IN: 7d # 服务地址 AI_GENERATOR_URL: http://ai-generator:8001 EXECUTOR_URL: http://executor:8002 PERFORMANCE_URL: http://performance:8003 # MinIO 配置 MINIO_ENDPOINT: minio MINIO_PORT: 9000 MINIO_ACCESS_KEY: testmaster MINIO_SECRET_KEY: testmaster_password_2024 MINIO_USE_SSL: false ports: - "3000:3000" volumes: - ./backend/gateway:/app - /app/node_modules networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # AI Generator Service ai-generator: build: context: . dockerfile: docker/ai-generator/Dockerfile container_name: testmaster-ai-generator hostname: ai-generator restart: unless-stopped depends_on: mongodb: condition: service_healthy redis: condition: service_healthy rabbitmq: condition: service_healthy environment: ENVIRONMENT: development PORT: 8001 # MongoDB 配置 MONGODB_URI: mongodb://testmaster:testmaster_password_2024@mongodb:27017/testmaster?authSource=admin # Redis 配置 REDIS_HOST: redis REDIS_PORT: 6379 REDIS_PASSWORD: testmaster_password_2024 # RabbitMQ 配置 RABBITMQ_HOST: rabbitmq RABBITMQ_PORT: 5672 RABBITMQ_USER: testmaster RABBITMQ_PASSWORD: testmaster_password_2024 RABBITMQ_VHOST: testmaster # OpenAI 配置 OPENAI_API_KEY: ${OPENAI_API_KEY} OPENAI_MODEL: gpt-4-turbo-preview OPENAI_MAX_TOKENS: 4000 # DeepSeek 配置 DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY} DEEPSEEK_MODEL: deepseek-coder # Claude 配置 ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} ANTHROPIC_MODEL: claude-3-opus-20240229 ports: - "8001:8001" volumes: - ./backend/services/ai-generator:/app networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8001/health"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Executor Service executor: build: context: . dockerfile: docker/executor/Dockerfile container_name: testmaster-executor hostname: executor restart: unless-stopped depends_on: mongodb: condition: service_healthy redis: condition: service_healthy rabbitmq: condition: service_healthy selenium-hub: condition: service_healthy environment: ENVIRONMENT: development PORT: 8002 # MongoDB 配置 MONGODB_URI: mongodb://testmaster:testmaster_password_2024@mongodb:27017/testmaster?authSource=admin # Redis 配置 REDIS_HOST: redis REDIS_PORT: 6379 REDIS_PASSWORD: testmaster_password_2024 # RabbitMQ 配置 RABBITMQ_HOST: rabbitmq RABBITMQ_PORT: 5672 RABBITMQ_USER: testmaster RABBITMQ_PASSWORD: testmaster_password_2024 RABBITMQ_VHOST: testmaster # Selenium 配置 SELENIUM_HUB_URL: http://selenium-hub:4444/wd/hub # MinIO 配置 MINIO_ENDPOINT: minio MINIO_PORT: 9000 MINIO_ACCESS_KEY: testmaster MINIO_SECRET_KEY: testmaster_password_2024 MINIO_USE_SSL: false # 执行配置 MAX_PARALLEL_EXECUTIONS: 5 EXECUTION_TIMEOUT: 3600 SCREENSHOT_ON_FAILURE: true VIDEO_RECORDING: true ports: - "8002:8002" volumes: - ./backend/services/executor:/app - test-reports:/app/reports - test-recordings:/app/recordings networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8002/health"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Performance Service performance: build: context: . dockerfile: docker/performance/Dockerfile container_name: testmaster-performance hostname: performance restart: unless-stopped depends_on: mongodb: condition: service_healthy redis: condition: service_healthy rabbitmq: condition: service_healthy environment: ENVIRONMENT: development PORT: 8003 # MongoDB 配置 MONGODB_URI: mongodb://testmaster:testmaster_password_2024@mongodb:27017/testmaster?authSource=admin # Redis 配置 REDIS_HOST: redis REDIS_PORT: 6379 REDIS_PASSWORD: testmaster_password_2024 # RabbitMQ 配置 RABBITMQ_HOST: rabbitmq RABBITMQ_PORT: 5672 RABBITMQ_USER: testmaster RABBITMQ_PASSWORD: testmaster_password_2024 RABBITMQ_VHOST: testmaster # MinIO 配置 MINIO_ENDPOINT: minio MINIO_PORT: 9000 MINIO_ACCESS_KEY: testmaster MINIO_SECRET_KEY: testmaster_password_2024 MINIO_USE_SSL: false # 性能测试配置 MAX_CONCURRENT_TESTS: 3 DEFAULT_TEST_DURATION: 300 DEFAULT_USERS: 100 DEFAULT_SPAWN_RATE: 10 ports: - "8003:8003" - "8089:8089" # Locust Web UI volumes: - ./backend/services/performance:/app - test-reports:/app/reports networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8003/health"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ========================================================================== # 前端服务 # ========================================================================== # Frontend frontend: build: context: . dockerfile: docker/frontend/Dockerfile args: NODE_ENV: development container_name: testmaster-frontend hostname: frontend restart: unless-stopped depends_on: - gateway environment: NODE_ENV: development VITE_API_BASE_URL: http://localhost:3000/api VITE_WS_URL: ws://localhost:3000 ports: - "5173:5173" volumes: - ./frontend:/app - /app/node_modules networks: - testmaster-network logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ========================================================================== # Nginx 反向代理 # ========================================================================== nginx: image: nginx:alpine container_name: testmaster-nginx hostname: nginx restart: unless-stopped depends_on: - frontend - gateway ports: - "80:80" - "443:443" volumes: - ./docker/nginx/nginx.conf:/etc/nginx/nginx.conf - ./docker/nginx/conf.d:/etc/nginx/conf.d - ./docker/nginx/ssl:/etc/nginx/ssl networks: - testmaster-network healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ========================================================================== # 监控服务 # ========================================================================== # Prometheus prometheus: image: prom/prometheus:latest container_name: testmaster-prometheus hostname: prometheus restart: unless-stopped command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' ports: - "9090:9090" volumes: - ./docker/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml - prometheus-data:/prometheus networks: - testmaster-network healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:9090/-/healthy"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Grafana grafana: image: grafana/grafana:latest container_name: testmaster-grafana hostname: grafana restart: unless-stopped depends_on: - prometheus environment: GF_SECURITY_ADMIN_USER: admin GF_SECURITY_ADMIN_PASSWORD: admin GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource ports: - "3001:3000" volumes: - grafana-data:/var/lib/grafana - ./docker/grafana/provisioning:/etc/grafana/provisioning - ./docker/grafana/dashboards:/var/lib/grafana/dashboards networks: - testmaster-network healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/api/health"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Elasticsearch elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0 container_name: testmaster-elasticsearch hostname: elasticsearch restart: unless-stopped environment: - discovery.type=single-node - xpack.security.enabled=false - "ES_JAVA_OPTS=-Xms512m -Xmx512m" ports: - "9200:9200" - "9300:9300" volumes: - elasticsearch-data:/usr/share/elasticsearch/data networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Kibana kibana: image: docker.elastic.co/kibana/kibana:8.11.0 container_name: testmaster-kibana hostname: kibana restart: unless-stopped depends_on: elasticsearch: condition: service_healthy environment: ELASTICSEARCH_HOSTS: http://elasticsearch:9200 ports: - "5601:5601" networks: - testmaster-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:5601/api/status"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Logstash logstash: image: docker.elastic.co/logstash/logstash:8.11.0 container_name: testmaster-logstash hostname: logstash restart: unless-stopped depends_on: elasticsearch: condition: service_healthy ports: - "5000:5000/tcp" - "5000:5000/udp" - "9600:9600" volumes: - ./docker/logstash/pipeline:/usr/share/logstash/pipeline - ./docker/logstash/config/logstash.yml:/usr/share/logstash/config/logstash.yml networks: - testmaster-network logging: driver: "json-file" options: max-size: "10m" max-file: "3"

2.7.2 生产环境配置`docker-compose.prod.yml`

# TestMaster 自动化测试平台 - 生产环境 Docker Compose 配置 # 版本: 1.0.0 # 用途: 生产环境部署 version: '3.8' # 继承基础配置 include: - docker-compose.yml # ============================================================================ # 生产环境特定配置 # ============================================================================ services: # ========================================================================== # 数据库服务 - 生产优化 # ========================================================================== postgres: environment: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} command: > postgres -c max_connections=200 -c shared_buffers=256MB -c effective_cache_size=1GB -c maintenance_work_mem=64MB -c checkpoint_completion_target=0.9 -c wal_buffers=16MB -c default_statistics_target=100 -c random_page_cost=1.1 -c effective_io_concurrency=200 -c work_mem=2621kB -c min_wal_size=1GB -c max_wal_size=4GB deploy: resources: limits: cpus: '2' memory: 2G reservations: cpus: '1' memory: 1G mongodb: environment: MONGO_INITDB_ROOT_PASSWORD: ${MONGODB_PASSWORD} command: > mongod --wiredTigerCacheSizeGB 1.5 --wiredTigerCollectionBlockCompressor snappy deploy: resources: limits: cpus: '2' memory: 2G reservations: cpus: '1' memory: 1G redis: command: > redis-server --requirepass ${REDIS_PASSWORD} --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru deploy: resources: limits: cpus: '1' memory: 512M reservations: cpus: '0.5' memory: 256M # ========================================================================== # 后端服务 - 生产优化 # ========================================================================== gateway: environment: NODE_ENV: production DB_PASSWORD: ${POSTGRES_PASSWORD} REDIS_PASSWORD: ${REDIS_PASSWORD} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} JWT_SECRET: ${JWT_SECRET} MINIO_SECRET_KEY: ${MINIO_SECRET_KEY} deploy: replicas: 3 resources: limits: cpus: '1' memory: 1G reservations: cpus: '0.5' memory: 512M restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s logging: driver: "json-file" options: max-size: "50m" max-file: "5" ai-generator: environment: ENVIRONMENT: production MONGODB_URI: mongodb://testmaster:${MONGODB_PASSWORD}@mongodb:27017/testmaster?authSource=admin REDIS_PASSWORD: ${REDIS_PASSWORD} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} OPENAI_API_KEY: ${OPENAI_API_KEY} DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY} ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} deploy: replicas: 2 resources: limits: cpus: '2' memory: 2G reservations: cpus: '1' memory: 1G restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s executor: environment: ENVIRONMENT: production MONGODB_URI: mongodb://testmaster:${MONGODB_PASSWORD}@mongodb:27017/testmaster?authSource=admin REDIS_PASSWORD: ${REDIS_PASSWORD} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} MINIO_SECRET_KEY: ${MINIO_SECRET_KEY} MAX_PARALLEL_EXECUTIONS: 10 deploy: replicas: 3 resources: limits: cpus: '2' memory: 2G reservations: cpus: '1' memory: 1G restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s performance: environment: ENVIRONMENT: production MONGODB_URI: mongodb://testmaster:${MONGODB_PASSWORD}@mongodb:27017/testmaster?authSource=admin REDIS_PASSWORD: ${REDIS_PASSWORD} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} MINIO_SECRET_KEY: ${MINIO_SECRET_KEY} MAX_CONCURRENT_TESTS: 5 deploy: replicas: 2 resources: limits: cpus: '2' memory: 2G reservations: cpus: '1' memory: 1G restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s # ========================================================================== # Selenium Grid - 生产扩展 # ========================================================================== selenium-chrome: environment: SE_NODE_MAX_SESSIONS: 10 deploy: replicas: 3 resources: limits: cpus: '2' memory: 4G reservations: cpus: '1' memory: 2G selenium-firefox: environment: SE_NODE_MAX_SESSIONS: 10 deploy: replicas: 2 resources: limits: cpus: '2' memory: 4G reservations: cpus: '1' memory: 2G selenium-edge: environment: SE_NODE_MAX_SESSIONS: 10 deploy: replicas: 2 resources: limits: cpus: '2' memory: 4G reservations: cpus: '1' memory: 2G # ========================================================================== # Nginx - 生产配置 # ========================================================================== nginx: volumes: - ./docker/nginx/nginx.prod.conf:/etc/nginx/nginx.conf - ./docker/nginx/conf.d:/etc/nginx/conf.d - ./docker/nginx/ssl:/etc/nginx/ssl - /var/log/nginx:/var/log/nginx deploy: resources: limits: cpus: '1' memory: 512M reservations: cpus: '0.5' memory: 256M

继续下一部分...

2.7.3 开发环境配置`docker-compose.dev.yml`

# TestMaster 自动化测试平台 - 开发环境 Docker Compose 配置 # 版本: 1.0.0 # 用途: 本地开发环境 version: '3.8' # 继承基础配置 include: - docker-compose.yml # ============================================================================ # 开发环境特定配置 # ============================================================================ services: # ========================================================================== # 后端服务 - 开发模式 # ========================================================================== gateway: command: npm run dev environment: NODE_ENV: development DEBUG: testmaster:* volumes: - ./backend/gateway:/app - /app/node_modules stdin_open: true tty: true ai-generator: command: python -m uvicorn src.main:app --host 0.0.0.0 --port 8001 --reload environment: ENVIRONMENT: development LOG_LEVEL: DEBUG volumes: - ./backend/services/ai-generator:/app stdin_open: true tty: true executor: command: python -m uvicorn src.main:app --host 0.0.0.0 --port 8002 --reload environment: ENVIRONMENT: development LOG_LEVEL: DEBUG volumes: - ./backend/services/executor:/app stdin_open: true tty: true performance: command: python -m uvicorn src.main:app --host 0.0.0.0 --port 8003 --reload environment: ENVIRONMENT: development LOG_LEVEL: DEBUG volumes: - ./backend/services/performance:/app stdin_open: true tty: true # ========================================================================== # 前端服务 - 开发模式 # ========================================================================== frontend: command: npm run dev environment: NODE_ENV: development volumes: - ./frontend:/app - /app/node_modules stdin_open: true tty: true # ========================================================================== # 开发工具 # ========================================================================== # Adminer - 数据库管理 adminer: image: adminer:latest container_name: testmaster-adminer hostname: adminer restart: unless-stopped ports: - "8080:8080" networks: - testmaster-network environment: ADMINER_DEFAULT_SERVER: postgres # Mongo Express - MongoDB 管理 mongo-express: image: mongo-express:latest container_name: testmaster-mongo-express hostname: mongo-express restart: unless-stopped depends_on: - mongodb ports: - "8081:8081" networks: - testmaster-network environment: ME_CONFIG_MONGODB_ADMINUSERNAME: testmaster ME_CONFIG_MONGODB_ADMINPASSWORD: testmaster_password_2024 ME_CONFIG_MONGODB_URL: mongodb://testmaster:testmaster_password_2024@mongodb:27017/ ME_CONFIG_BASICAUTH_USERNAME: admin ME_CONFIG_BASICAUTH_PASSWORD: admin # Redis Commander - Redis 管理 redis-commander: image: rediscommander/redis-commander:latest container_name: testmaster-redis-commander hostname: redis-commander restart: unless-stopped depends_on: - redis ports: - "8082:8081" networks: - testmaster-network environment: REDIS_HOSTS: local:redis:6379:0:testmaster_password_2024

2.7.4 测试环境配置`docker-compose.test.yml`

# TestMaster 自动化测试平台 - 测试环境 Docker Compose 配置 # 版本: 1.0.0 # 用途: 自动化测试环境 version: '3.8' # 继承基础配置 include: - docker-compose.yml # ============================================================================ # 测试环境特定配置 # ============================================================================ services: # ========================================================================== # 测试数据库 - 使用内存存储 # ========================================================================== postgres: tmpfs: - /var/lib/postgresql/data environment: POSTGRES_DB: testmaster_test mongodb: tmpfs: - /data/db environment: MONGO_INITDB_DATABASE: testmaster_test redis: tmpfs: - /data # ========================================================================== # 测试服务配置 # ========================================================================== gateway: environment: NODE_ENV: test DB_NAME: testmaster_test command: npm run test:e2e ai-generator: environment: ENVIRONMENT: test MONGODB_URI: mongodb://testmaster:testmaster_password_2024@mongodb:27017/testmaster_test?authSource=admin command: pytest tests/ -v --cov=src executor: environment: ENVIRONMENT: test MONGODB_URI: mongodb://testmaster:testmaster_password_2024@mongodb:27017/testmaster_test?authSource=admin command: pytest tests/ -v --cov=src performance: environment: ENVIRONMENT: test MONGODB_URI: mongodb://testmaster:testmaster_password_2024@mongodb:27017/testmaster_test?authSource=admin command: pytest tests/ -v --cov=src

2.7.5 环境变量配置`.env.example`

# TestMaster 自动化测试平台 - 环境变量配置示例 # 复制此文件为 .env 并填写实际值 # ============================================================================ # 环境配置 # ============================================================================ NODE_ENV=development ENVIRONMENT=development # ============================================================================ # 数据库配置 # ============================================================================ # PostgreSQL POSTGRES_HOST=postgres POSTGRES_PORT=5432 POSTGRES_DB=testmaster POSTGRES_USER=testmaster POSTGRES_PASSWORD=your_secure_postgres_password_here # MongoDB MONGODB_HOST=mongodb MONGODB_PORT=27017 MONGODB_USER=testmaster MONGODB_PASSWORD=your_secure_mongodb_password_here MONGODB_DATABASE=testmaster # Redis REDIS_HOST=redis REDIS_PORT=6379 REDIS_PASSWORD=your_secure_redis_password_here # ============================================================================ # 消息队列配置 # ============================================================================ # RabbitMQ RABBITMQ_HOST=rabbitmq RABBITMQ_PORT=5672 RABBITMQ_USER=testmaster RABBITMQ_PASSWORD=your_secure_rabbitmq_password_here RABBITMQ_VHOST=testmaster # ============================================================================ # 对象存储配置 # ============================================================================ # MinIO MINIO_ENDPOINT=minio MINIO_PORT=9000 MINIO_ACCESS_KEY=testmaster MINIO_SECRET_KEY=your_secure_minio_secret_key_here MINIO_USE_SSL=false # ============================================================================ # 安全配置 # ============================================================================ # JWT JWT_SECRET=your_secure_jwt_secret_key_here_at_least_32_characters JWT_EXPIRES_IN=7d # 加密密钥 ENCRYPTION_KEY=your_secure_encryption_key_here_32_characters # ============================================================================ # AI 服务配置 # ============================================================================ # OpenAI OPENAI_API_KEY=sk-your-openai-api-key-here OPENAI_MODEL=gpt-4-turbo-preview OPENAI_MAX_TOKENS=4000 OPENAI_TEMPERATURE=0.7 # DeepSeek DEEPSEEK_API_KEY=your-deepseek-api-key-here DEEPSEEK_MODEL=deepseek-coder DEEPSEEK_BASE_URL=https://api.deepseek.com # Anthropic Claude ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key-here ANTHROPIC_MODEL=claude-3-opus-20240229 ANTHROPIC_MAX_TOKENS=4000 # ============================================================================ # Selenium Grid 配置 # ============================================================================ SELENIUM_HUB_URL=http://selenium-hub:4444/wd/hub SELENIUM_IMPLICIT_WAIT=10 SELENIUM_PAGE_LOAD_TIMEOUT=30 SELENIUM_SCRIPT_TIMEOUT=30 # ============================================================================ # 执行器配置 # ============================================================================ MAX_PARALLEL_EXECUTIONS=5 EXECUTION_TIMEOUT=3600 SCREENSHOT_ON_FAILURE=true VIDEO_RECORDING=true RETRY_FAILED_TESTS=true MAX_RETRY_ATTEMPTS=2 # ============================================================================ # 性能测试配置 # ============================================================================ MAX_CONCURRENT_TESTS=3 DEFAULT_TEST_DURATION=300 DEFAULT_USERS=100 DEFAULT_SPAWN_RATE=10 LOCUST_WEB_PORT=8089 # ============================================================================ # 监控配置 # ============================================================================ # Prometheus PROMETHEUS_PORT=9090 # Grafana GRAFANA_PORT=3001 GRAFANA_ADMIN_USER=admin GRAFANA_ADMIN_PASSWORD=admin # Elasticsearch ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_PORT=9200 # Kibana KIBANA_PORT=5601 # ============================================================================ # 日志配置 # ============================================================================ LOG_LEVEL=info LOG_FORMAT=json LOG_MAX_SIZE=10m LOG_MAX_FILES=3 # ============================================================================ # 邮件配置 # ============================================================================ SMTP_HOST=smtp.gmail.com SMTP_PORT=587 SMTP_SECURE=false SMTP_USER=your-email@gmail.com SMTP_PASSWORD=your-email-password SMTP_FROM=TestMaster <noreply@testmaster.com> # ============================================================================ # Webhook 配置 # ============================================================================ SLACK_WEBHOOK_URL=https://hooks.slack.com/services/YOUR/WEBHOOK/URL TEAMS_WEBHOOK_URL=https://outlook.office.com/webhook/YOUR/WEBHOOK/URL # ============================================================================ # 其他配置 # ============================================================================ # 应用配置 APP_NAME=TestMaster APP_VERSION=1.0.0 APP_URL=http://localhost # API 配置 API_PORT=3000 API_PREFIX=/api API_RATE_LIMIT=100 # 前端配置 FRONTEND_PORT=5173 VITE_API_BASE_URL=http://localhost:3000/api VITE_WS_URL=ws://localhost:3000 # Nginx 配置 NGINX_HTTP_PORT=80 NGINX_HTTPS_PORT=443

2.7.6 快速启动脚本`scripts/docker-start.sh`

#!/bin/bash # TestMaster 自动化测试平台 - Docker 快速启动脚本 # 版本: 1.0.0 set -e # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # 打印带颜色的消息 print_message() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } # 打印标题 print_header() { echo "" echo "============================================================================" echo " $1" echo "============================================================================" echo "" } # 检查 Docker 是否安装 check_docker() { if ! command -v docker &> /dev/null; then print_message $RED "❌ Docker 未安装，请先安装 Docker" exit 1 fi if ! command -v docker-compose &> /dev/null; then print_message $RED "❌ Docker Compose 未安装，请先安装 Docker Compose" exit 1 fi print_message $GREEN "✅ Docker 和 Docker Compose 已安装" } # 检查环境变量文件 check_env_file() { if [ ! -f .env ]; then print_message $YELLOW "⚠️ 未找到 .env 文件，正在从 .env.example 创建..." cp .env.example .env print_message $GREEN "✅ 已创建 .env 文件，请根据需要修改配置" else print_message $GREEN "✅ 找到 .env 文件" fi } # 创建必要的目录 create_directories() { print_message $YELLOW "📁 创建必要的目录..." mkdir -p docker/postgres/init mkdir -p docker/mongodb/init mkdir -p docker/nginx/conf.d mkdir -p docker/nginx/ssl mkdir -p docker/prometheus mkdir -p docker/grafana/provisioning/datasources mkdir -p docker/grafana/provisioning/dashboards mkdir -p docker/grafana/dashboards mkdir -p docker/logstash/pipeline mkdir -p docker/logstash/config print_message $GREEN "✅ 目录创建完成" } # 停止并删除现有容器 cleanup() { print_message $YELLOW "🧹 清理现有容器..." docker-compose down -v print_message $GREEN "✅ 清理完成" } # 构建镜像 build_images() { print_message $YELLOW "🏗️ 构建 Docker 镜像..." docker-compose build --no-cache print_message $GREEN "✅ 镜像构建完成" } # 启动服务 start_services() { local env=$1 local compose_file="docker-compose.yml" case $env in dev) compose_file="docker-compose.yml:docker-compose.dev.yml" ;; test) compose_file="docker-compose.yml:docker-compose.test.yml" ;; prod) compose_file="docker-compose.yml:docker-compose.prod.yml" ;; esac print_message $YELLOW "🚀 启动服务（环境: $env）..." COMPOSE_FILE=$compose_file docker-compose up -d print_message $GREEN "✅ 服务启动完成" } # 等待服务就绪 wait_for_services() { print_message $YELLOW "⏳ 等待服务就绪..." local max_attempts=60 local attempt=0 while [ $attempt -lt $max_attempts ]; do if curl -f http://localhost:3000/api/health &> /dev/null; then print_message $GREEN "✅ 所有服务已就绪" return 0 fi attempt=$((attempt + 1)) echo -n "." sleep 2 done print_message $RED "❌ 服务启动超时" return 1 } # 显示服务状态 show_status() { print_header "服务状态" docker-compose ps } # 显示访问信息 show_access_info() { print_header "访问信息" echo "🌐 前端应用: http://localhost:5173" echo "🔌 API 网关: http://localhost:3000" echo "🤖 AI Generator: http://localhost:8001" echo "▶️ Executor: http://localhost:8002" echo "⚡ Performance: http://localhost:8003" echo "" echo "📊 监控服务:" echo " - Prometheus: http://localhost:9090" echo " - Grafana: http://localhost:3001 (admin/admin)" echo " - Kibana: http://localhost:5601" echo "" echo "🔧 管理工具:" echo " - RabbitMQ: http://localhost:15672 (testmaster/testmaster_password_2024)" echo " - MinIO: http://localhost:9001 (testmaster/testmaster_password_2024)" echo " - Selenium Grid: http://localhost:4444" echo " - Adminer: http://localhost:8080" echo " - Mongo Express: http://localhost:8081 (admin/admin)" echo " - Redis Commander: http://localhost:8082" echo "" } # 显示日志 show_logs() { local service=$1 if [ -z "$service" ]; then docker-compose logs -f else docker-compose logs -f $service fi } # 主函数 main() { print_header "TestMaster 自动化测试平台 - Docker 启动脚本" # 解析参数 local command=${1:-start} local env=${2:-dev} case $command in start) check_docker check_env_file create_directories build_images start_services $env wait_for_services show_status show_access_info ;; stop) print_message $YELLOW "🛑 停止服务..." docker-compose down print_message $GREEN "✅ 服务已停止" ;; restart) print_message $YELLOW "🔄 重启服务..." docker-compose restart print_message $GREEN "✅ 服务已重启" ;; clean) cleanup ;; logs) show_logs $env ;; status) show_status ;; *) echo "用法: $0 {start|stop|restart|clean|logs|status} [dev|test|prod]" echo "" echo "命令:" echo " start - 启动所有服务" echo " stop - 停止所有服务" echo " restart - 重启所有服务" echo " clean - 清理所有容器和数据" echo " logs - 查看日志" echo " status - 查看服务状态" echo "" echo "环境:" echo " dev - 开发环境（默认）" echo " test - 测试环境" echo " prod - 生产环境" exit 1 ;; esac } # 执行主函数 main "$@"

2.7.7 健康检查脚本`scripts/health-check.sh`

#!/bin/bash # TestMaster 自动化测试平台 - 健康检查脚本 # 版本: 1.0.0 set -e # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # 检查服务健康状态 check_service() { local service_name=$1 local health_url=$2 local max_attempts=30 local attempt=0 echo -n "检查 $service_name... " while [ $attempt -lt $max_attempts ]; do if curl -f -s $health_url > /dev/null 2>&1; then echo -e "${GREEN}✅ 健康${NC}" return 0 fi attempt=$((attempt + 1)) sleep 2 done echo -e "${RED}❌ 不健康${NC}" return 1 } # 主函数 main() { echo "============================================================================" echo " TestMaster 健康检查" echo "============================================================================" echo "" local all_healthy=true # 检查核心服务 check_service "API Gateway" "http://localhost:3000/api/health" || all_healthy=false check_service "AI Generator" "http://localhost:8001/health" || all_healthy=false check_service "Executor" "http://localhost:8002/health" || all_healthy=false check_service "Performance" "http://localhost:8003/health" || all_healthy=false # 检查数据库 check_service "PostgreSQL" "http://localhost:5432" || all_healthy=false check_service "MongoDB" "http://localhost:27017" || all_healthy=false check_service "Redis" "http://localhost:6379" || all_healthy=false # 检查其他服务 check_service "RabbitMQ" "http://localhost:15672" || all_healthy=false check_service "MinIO" "http://localhost:9000/minio/health/live" || all_healthy=false check_service "Selenium Hub" "http://localhost:4444/wd/hub/status" || all_healthy=false # 检查监控服务 check_service "Prometheus" "http://localhost:9090/-/healthy" || all_healthy=false check_service "Grafana" "http://localhost:3001/api/health" || all_healthy=false echo "" if [ "$all_healthy" = true ]; then echo -e "${GREEN}✅ 所有服务健康${NC}" exit 0 else echo -e "${RED}❌ 部分服务不健康${NC}" exit 1 fi } main "$@"

TestMaster 自动化测试平台 - 第八部分：Kubernetes 完整部署配置

2.8 Kubernetes 部署配置

2.8.1 命名空间配置`k8s/namespace.yaml`

# TestMaster 自动化测试平台 - Namespace 配置 # 版本: 1.0.0 apiVersion: v1 kind: Namespace metadata: name: testmaster labels: name: testmaster environment: production app: testmaster version: "1.0.0" annotations: description: "TestMaster 自动化测试平台生产环境" contact: "devops@testmaster.com"

2.8.2 ConfigMap 配置

2.8.2.1 应用配置`k8s/configmaps/app-config.yaml`

# TestMaster 自动化测试平台 - 应用配置 ConfigMap # 版本: 1.0.0 apiVersion: v1 kind: ConfigMap metadata: name: testmaster-app-config namespace: testmaster labels: app: testmaster component: config data: # 应用配置 APP_NAME: "TestMaster" APP_VERSION: "1.0.0" NODE_ENV: "production" ENVIRONMENT: "production" # API 配置 API_PORT: "3000" API_PREFIX: "/api" API_RATE_LIMIT: "100" # 数据库配置 DB_HOST: "testmaster-postgres" DB_PORT: "5432" DB_NAME: "testmaster" DB_USER: "testmaster" MONGODB_HOST: "testmaster-mongodb" MONGODB_PORT: "27017" MONGODB_DATABASE: "testmaster" MONGODB_USER: "testmaster" REDIS_HOST: "testmaster-redis" REDIS_PORT: "6379" # 消息队列配置 RABBITMQ_HOST: "testmaster-rabbitmq" RABBITMQ_PORT: "5672" RABBITMQ_USER: "testmaster" RABBITMQ_VHOST: "testmaster" # 对象存储配置 MINIO_ENDPOINT: "testmaster-minio" MINIO_PORT: "9000" MINIO_ACCESS_KEY: "testmaster" MINIO_USE_SSL: "false" # Selenium Grid 配置 SELENIUM_HUB_URL: "http://testmaster-selenium-hub:4444/wd/hub" SELENIUM_IMPLICIT_WAIT: "10" SELENIUM_PAGE_LOAD_TIMEOUT: "30" SELENIUM_SCRIPT_TIMEOUT: "30" # 执行器配置 MAX_PARALLEL_EXECUTIONS: "10" EXECUTION_TIMEOUT: "3600" SCREENSHOT_ON_FAILURE: "true" VIDEO_RECORDING: "true" RETRY_FAILED_TESTS: "true" MAX_RETRY_ATTEMPTS: "2" # 性能测试配置 MAX_CONCURRENT_TESTS: "5" DEFAULT_TEST_DURATION: "300" DEFAULT_USERS: "100" DEFAULT_SPAWN_RATE: "10" LOCUST_WEB_PORT: "8089" # 日志配置 LOG_LEVEL: "info" LOG_FORMAT: "json" # AI 服务配置 OPENAI_MODEL: "gpt-4-turbo-preview" OPENAI_MAX_TOKENS: "4000" OPENAI_TEMPERATURE: "0.7" DEEPSEEK_MODEL: "deepseek-coder" DEEPSEEK_BASE_URL: "https://api.deepseek.com" ANTHROPIC_MODEL: "claude-3-opus-20240229" ANTHROPIC_MAX_TOKENS: "4000" --- apiVersion: v1 kind: ConfigMap metadata: name: testmaster-nginx-config namespace: testmaster labels: app: testmaster component: nginx data: nginx.conf: | user nginx; worker_processes auto; error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; events { worker_connections 4096; use epoll; multi_accept on; } http { include /etc/nginx/mime.types; default_type application/octet-stream; log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; access_log /var/log/nginx/access.log main; sendfile on; tcp_nopush on; tcp_nodelay on; keepalive_timeout 65; types_hash_max_size 2048; client_max_body_size 100M; gzip on; gzip_vary on; gzip_proxied any; gzip_comp_level 6; gzip_types text/plain text/css text/xml text/javascript application/json application/javascript application/xml+rss application/rss+xml font/truetype font/opentype application/vnd.ms-fontobject image/svg+xml; upstream gateway { least_conn; server testmaster-gateway:3000 max_fails=3 fail_timeout=30s; } upstream frontend { least_conn; server testmaster-frontend:5173 max_fails=3 fail_timeout=30s; } server { listen 80; server_name _; location /api { proxy_pass http://gateway; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_cache_bypass $http_upgrade; proxy_read_timeout 300s; proxy_connect_timeout 75s; } location /ws { proxy_pass http://gateway; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } location / { proxy_pass http://frontend; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_cache_bypass $http_upgrade; } location /health { access_log off; return 200 "healthy\n"; add_header Content-Type text/plain; } } } --- apiVersion: v1 kind: ConfigMap metadata: name: testmaster-prometheus-config namespace: testmaster labels: app: testmaster component: prometheus data: prometheus.yml: | global: scrape_interval: 15s evaluation_interval: 15s external_labels: cluster: 'testmaster-k8s' environment: 'production' scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https - job_name: 'kubernetes-nodes' kubernetes_sd_configs: - role: node scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: kubernetes_pod_name - job_name: 'testmaster-gateway' static_configs: - targets: ['testmaster-gateway:3000'] labels: service: 'gateway' - job_name: 'testmaster-ai-generator' static_configs: - targets: ['testmaster-ai-generator:8001'] labels: service: 'ai-generator' - job_name: 'testmaster-executor' static_configs: - targets: ['testmaster-executor:8002'] labels: service: 'executor' - job_name: 'testmaster-performance' static_configs: - targets: ['testmaster-performance:8003'] labels: service: 'performance'

2.8.3 Secret 配置`k8s/secrets/secrets.yaml`

# TestMaster 自动化测试平台 - Secrets 配置 # 版本: 1.0.0 # 注意：生产环境应使用 Sealed Secrets 或外部密钥管理系统 apiVersion: v1 kind: Secret metadata: name: testmaster-db-secrets namespace: testmaster labels: app: testmaster component: database type: Opaque stringData: POSTGRES_PASSWORD: "your_secure_postgres_password_here" MONGODB_PASSWORD: "your_secure_mongodb_password_here" REDIS_PASSWORD: "your_secure_redis_password_here" --- apiVersion: v1 kind: Secret metadata: name: testmaster-mq-secrets namespace: testmaster labels: app: testmaster component: messagequeue type: Opaque stringData: RABBITMQ_PASSWORD: "your_secure_rabbitmq_password_here" --- apiVersion: v1 kind: Secret metadata: name: testmaster-storage-secrets namespace: testmaster labels: app: testmaster component: storage type: Opaque stringData: MINIO_SECRET_KEY: "your_secure_minio_secret_key_here" --- apiVersion: v1 kind: Secret metadata: name: testmaster-app-secrets namespace: testmaster labels: app: testmaster component: application type: Opaque stringData: JWT_SECRET: "your_secure_jwt_secret_key_here_at_least_32_characters" ENCRYPTION_KEY: "your_secure_encryption_key_here_32_characters" --- apiVersion: v1 kind: Secret metadata: name: testmaster-ai-secrets namespace: testmaster labels: app: testmaster component: ai type: Opaque stringData: OPENAI_API_KEY: "sk-your-openai-api-key-here" DEEPSEEK_API_KEY: "your-deepseek-api-key-here" ANTHROPIC_API_KEY: "sk-ant-your-anthropic-api-key-here" --- apiVersion: v1 kind: Secret metadata: name: testmaster-smtp-secrets namespace: testmaster labels: app: testmaster component: notification type: Opaque stringData: SMTP_USER: "your-email@gmail.com" SMTP_PASSWORD: "your-email-password" SLACK_WEBHOOK_URL: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL" TEAMS_WEBHOOK_URL: "https://outlook.office.com/webhook/YOUR/WEBHOOK/URL"

2.8.4 持久化存储配置

2.8.4.1 StorageClass`k8s/storage/storage-class.yaml`

# TestMaster 自动化测试平台 - StorageClass 配置 # 版本: 1.0.0 apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: testmaster-fast-ssd labels: app: testmaster provisioner: kubernetes.io/aws-ebs # 根据云提供商修改 parameters: type: gp3 iopsPerGB: "50" fsType: ext4 encrypted: "true" allowVolumeExpansion: true reclaimPolicy: Retain volumeBindingMode: WaitForFirstConsumer --- apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: testmaster-standard labels: app: testmaster provisioner: kubernetes.io/aws-ebs # 根据云提供商修改 parameters: type: gp2 fsType: ext4 encrypted: "true" allowVolumeExpansion: true reclaimPolicy: Retain volumeBindingMode: WaitForFirstConsumer

2.8.4.2 PersistentVolumeClaim`k8s/storage/pvc.yaml`

# TestMaster 自动化测试平台 - PVC 配置 # 版本: 1.0.0 # PostgreSQL PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-postgres-pvc namespace: testmaster labels: app: testmaster component: postgres spec: storageClassName: testmaster-fast-ssd accessModes: - ReadWriteOnce resources: requests: storage: 50Gi --- # MongoDB PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-mongodb-pvc namespace: testmaster labels: app: testmaster component: mongodb spec: storageClassName: testmaster-fast-ssd accessModes: - ReadWriteOnce resources: requests: storage: 100Gi --- # Redis PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-redis-pvc namespace: testmaster labels: app: testmaster component: redis spec: storageClassName: testmaster-fast-ssd accessModes: - ReadWriteOnce resources: requests: storage: 20Gi --- # RabbitMQ PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-rabbitmq-pvc namespace: testmaster labels: app: testmaster component: rabbitmq spec: storageClassName: testmaster-standard accessModes: - ReadWriteOnce resources: requests: storage: 30Gi --- # MinIO PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-minio-pvc namespace: testmaster labels: app: testmaster component: minio spec: storageClassName: testmaster-standard accessModes: - ReadWriteOnce resources: requests: storage: 200Gi --- # Prometheus PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-prometheus-pvc namespace: testmaster labels: app: testmaster component: prometheus spec: storageClassName: testmaster-standard accessModes: - ReadWriteOnce resources: requests: storage: 50Gi --- # Grafana PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-grafana-pvc namespace: testmaster labels: app: testmaster component: grafana spec: storageClassName: testmaster-standard accessModes: - ReadWriteOnce resources: requests: storage: 10Gi --- # Elasticsearch PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: name: testmaster-elasticsearch-pvc namespace: testmaster labels: app: testmaster component: elasticsearch spec: storageClassName: testmaster-fast-ssd accessModes: - ReadWriteOnce resources: requests: storage: 100Gi

2.8.5 数据库部署配置

2.8.5.1 PostgreSQL`k8s/databases/postgres.yaml`

# TestMaster 自动化测试平台 - PostgreSQL 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-postgres namespace: testmaster labels: app: testmaster component: postgres spec: type: ClusterIP ports: - port: 5432 targetPort: 5432 protocol: TCP name: postgres selector: app: testmaster component: postgres --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-postgres namespace: testmaster labels: app: testmaster component: postgres spec: serviceName: testmaster-postgres replicas: 1 selector: matchLabels: app: testmaster component: postgres template: metadata: labels: app: testmaster component: postgres annotations: prometheus.io/scrape: "true" prometheus.io/port: "9187" spec: containers: - name: postgres image: postgres:15-alpine imagePullPolicy: IfNotPresent ports: - containerPort: 5432 name: postgres env: - name: POSTGRES_DB valueFrom: configMapKeyRef: name: testmaster-app-config key: DB_NAME - name: POSTGRES_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: DB_USER - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: POSTGRES_PASSWORD - name: PGDATA value: /var/lib/postgresql/data/pgdata volumeMounts: - name: postgres-data mountPath: /var/lib/postgresql/data resources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 2Gi livenessProbe: exec: command: - /bin/sh - -c - pg_isready -U testmaster initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: exec: command: - /bin/sh - -c - pg_isready -U testmaster initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 - name: postgres-exporter image: prometheuscommunity/postgres-exporter:latest imagePullPolicy: IfNotPresent ports: - containerPort: 9187 name: metrics env: - name: DATA_SOURCE_NAME value: "postgresql://testmaster:$(POSTGRES_PASSWORD)@localhost:5432/testmaster?sslmode=disable" - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: POSTGRES_PASSWORD resources: requests: cpu: 100m memory: 128Mi limits: cpu: 200m memory: 256Mi volumeClaimTemplates: - metadata: name: postgres-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-fast-ssd resources: requests: storage: 50Gi

2.8.5.2 MongoDB`k8s/databases/mongodb.yaml`

# TestMaster 自动化测试平台 - MongoDB 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-mongodb namespace: testmaster labels: app: testmaster component: mongodb spec: type: ClusterIP ports: - port: 27017 targetPort: 27017 protocol: TCP name: mongodb selector: app: testmaster component: mongodb --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-mongodb namespace: testmaster labels: app: testmaster component: mongodb spec: serviceName: testmaster-mongodb replicas: 1 selector: matchLabels: app: testmaster component: mongodb template: metadata: labels: app: testmaster component: mongodb annotations: prometheus.io/scrape: "true" prometheus.io/port: "9216" spec: containers: - name: mongodb image: mongo:7 imagePullPolicy: IfNotPresent ports: - containerPort: 27017 name: mongodb env: - name: MONGO_INITDB_ROOT_USERNAME valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_USER - name: MONGO_INITDB_ROOT_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: MONGODB_PASSWORD - name: MONGO_INITDB_DATABASE valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_DATABASE volumeMounts: - name: mongodb-data mountPath: /data/db resources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 2Gi livenessProbe: exec: command: - mongo - --eval - "db.adminCommand('ping')" initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: exec: command: - mongo - --eval - "db.adminCommand('ping')" initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 - name: mongodb-exporter image: percona/mongodb_exporter:0.40 imagePullPolicy: IfNotPresent ports: - containerPort: 9216 name: metrics env: - name: MONGODB_URI value: "mongodb://testmaster:$(MONGODB_PASSWORD)@localhost:27017" - name: MONGODB_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: MONGODB_PASSWORD resources: requests: cpu: 100m memory: 128Mi limits: cpu: 200m memory: 256Mi volumeClaimTemplates: - metadata: name: mongodb-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-fast-ssd resources: requests: storage: 100Gi

2.8.5.3 Redis`k8s/databases/redis.yaml`

# TestMaster 自动化测试平台 - Redis 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-redis namespace: testmaster labels: app: testmaster component: redis spec: type: ClusterIP ports: - port: 6379 targetPort: 6379 protocol: TCP name: redis selector: app: testmaster component: redis --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-redis namespace: testmaster labels: app: testmaster component: redis spec: serviceName: testmaster-redis replicas: 1 selector: matchLabels: app: testmaster component: redis template: metadata: labels: app: testmaster component: redis annotations: prometheus.io/scrape: "true" prometheus.io/port: "9121" spec: containers: - name: redis image: redis:7-alpine imagePullPolicy: IfNotPresent command: - redis-server - --requirepass - $(REDIS_PASSWORD) - --appendonly - "yes" - --maxmemory - "512mb" - --maxmemory-policy - allkeys-lru ports: - containerPort: 6379 name: redis env: - name: REDIS_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: REDIS_PASSWORD volumeMounts: - name: redis-data mountPath: /data resources: requests: cpu: 250m memory: 512Mi limits: cpu: 1000m memory: 1Gi livenessProbe: exec: command: - redis-cli - ping initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: exec: command: - redis-cli - ping initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 - name: redis-exporter image: oliver006/redis_exporter:latest imagePullPolicy: IfNotPresent ports: - containerPort: 9121 name: metrics env: - name: REDIS_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: REDIS_PASSWORD - name: REDIS_ADDR value: "localhost:6379" resources: requests: cpu: 100m memory: 128Mi limits: cpu: 200m memory: 256Mi volumeClaimTemplates: - metadata: name: redis-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-fast-ssd resources: requests: storage: 20Gi

2.8.6 消息队列和存储服务

2.8.6.1 RabbitMQ`k8s/services/rabbitmq.yaml`

# TestMaster 自动化测试平台 - RabbitMQ 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-rabbitmq namespace: testmaster labels: app: testmaster component: rabbitmq spec: type: ClusterIP ports: - port: 5672 targetPort: 5672 protocol: TCP name: amqp - port: 15672 targetPort: 15672 protocol: TCP name: management selector: app: testmaster component: rabbitmq --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-rabbitmq namespace: testmaster labels: app: testmaster component: rabbitmq spec: serviceName: testmaster-rabbitmq replicas: 1 selector: matchLabels: app: testmaster component: rabbitmq template: metadata: labels: app: testmaster component: rabbitmq annotations: prometheus.io/scrape: "true" prometheus.io/port: "15692" spec: containers: - name: rabbitmq image: rabbitmq:3.12-management-alpine imagePullPolicy: IfNotPresent ports: - containerPort: 5672 name: amqp - containerPort: 15672 name: management - containerPort: 15692 name: metrics env: - name: RABBITMQ_DEFAULT_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_USER - name: RABBITMQ_DEFAULT_PASS valueFrom: secretKeyRef: name: testmaster-mq-secrets key: RABBITMQ_PASSWORD - name: RABBITMQ_DEFAULT_VHOST valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_VHOST volumeMounts: - name: rabbitmq-data mountPath: /var/lib/rabbitmq resources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 2Gi livenessProbe: exec: command: - rabbitmq-diagnostics - -q - ping initialDelaySeconds: 60 periodSeconds: 30 timeoutSeconds: 10 failureThreshold: 3 readinessProbe: exec: command: - rabbitmq-diagnostics - -q - check_running initialDelaySeconds: 20 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 volumeClaimTemplates: - metadata: name: rabbitmq-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-standard resources: requests: storage: 30Gi

2.8.6.2 MinIO`k8s/services/minio.yaml`

# TestMaster 自动化测试平台 - MinIO 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-minio namespace: testmaster labels: app: testmaster component: minio spec: type: ClusterIP ports: - port: 9000 targetPort: 9000 protocol: TCP name: api - port: 9001 targetPort: 9001 protocol: TCP name: console selector: app: testmaster component: minio --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-minio namespace: testmaster labels: app: testmaster component: minio spec: serviceName: testmaster-minio replicas: 1 selector: matchLabels: app: testmaster component: minio template: metadata: labels: app: testmaster component: minio annotations: prometheus.io/scrape: "true" prometheus.io/port: "9000" prometheus.io/path: "/minio/v2/metrics/cluster" spec: containers: - name: minio image: minio/minio:latest imagePullPolicy: IfNotPresent command: - /bin/bash - -c args: - minio server /data --console-address :9001 ports: - containerPort: 9000 name: api - containerPort: 9001 name: console env: - name: MINIO_ROOT_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_ACCESS_KEY - name: MINIO_ROOT_PASSWORD valueFrom: secretKeyRef: name: testmaster-storage-secrets key: MINIO_SECRET_KEY volumeMounts: - name: minio-data mountPath: /data resources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 2Gi livenessProbe: httpGet: path: /minio/health/live port: 9000 initialDelaySeconds: 30 periodSeconds: 20 timeoutSeconds: 10 failureThreshold: 3 readinessProbe: httpGet: path: /minio/health/ready port: 9000 initialDelaySeconds: 10 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 volumeClaimTemplates: - metadata: name: minio-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-standard resources: requests: storage: 200Gi --- # MinIO 初始化 Job apiVersion: batch/v1 kind: Job metadata: name: testmaster-minio-init namespace: testmaster labels: app: testmaster component: minio-init spec: template: metadata: labels: app: testmaster component: minio-init spec: restartPolicy: OnFailure containers: - name: minio-init image: minio/mc:latest imagePullPolicy: IfNotPresent command: - /bin/sh - -c - | sleep 10 mc config host add myminio http://testmaster-minio:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD mc mb myminio/test-reports --ignore-existing mc mb myminio/test-recordings --ignore-existing mc mb myminio/test-screenshots --ignore-existing mc anonymous set download myminio/test-reports mc anonymous set download myminio/test-recordings mc anonymous set download myminio/test-screenshots echo "MinIO buckets initialized successfully" env: - name: MINIO_ROOT_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_ACCESS_KEY - name: MINIO_ROOT_PASSWORD valueFrom: secretKeyRef: name: testmaster-storage-secrets key: MINIO_SECRET_KEY

继续下一部分...

2.8.7 Selenium Grid 部署

2.8.7.1 Selenium Hub`k8s/selenium/hub.yaml`

# TestMaster 自动化测试平台 - Selenium Hub 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-selenium-hub namespace: testmaster labels: app: testmaster component: selenium-hub spec: type: ClusterIP ports: - port: 4444 targetPort: 4444 protocol: TCP name: selenium - port: 4442 targetPort: 4442 protocol: TCP name: event-bus-publish - port: 4443 targetPort: 4443 protocol: TCP name: event-bus-subscribe selector: app: testmaster component: selenium-hub --- apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-selenium-hub namespace: testmaster labels: app: testmaster component: selenium-hub spec: replicas: 1 selector: matchLabels: app: testmaster component: selenium-hub template: metadata: labels: app: testmaster component: selenium-hub spec: containers: - name: selenium-hub image: selenium/hub:4.15.0 imagePullPolicy: IfNotPresent ports: - containerPort: 4444 name: selenium - containerPort: 4442 name: event-bus-pub - containerPort: 4443 name: event-bus-sub env: - name: SE_SESSION_REQUEST_TIMEOUT value: "300" - name: SE_SESSION_RETRY_INTERVAL value: "5" - name: SE_HEALTHCHECK_INTERVAL value: "10" resources: requests: cpu: 500m memory: 512Mi limits: cpu: 1000m memory: 1Gi livenessProbe: httpGet: path: /wd/hub/status port: 4444 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /wd/hub/status port: 4444 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3

2.8.7.2 Chrome Node`k8s/selenium/chrome-node.yaml`

# TestMaster 自动化测试平台 - Chrome Node 部署 # 版本: 1.0.0 apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-selenium-chrome namespace: testmaster labels: app: testmaster component: selenium-chrome spec: replicas: 3 selector: matchLabels: app: testmaster component: selenium-chrome template: metadata: labels: app: testmaster component: selenium-chrome spec: containers: - name: selenium-chrome image: selenium/node-chrome:4.15.0 imagePullPolicy: IfNotPresent ports: - containerPort: 5555 name: node - containerPort: 7900 name: vnc env: - name: SE_EVENT_BUS_HOST value: testmaster-selenium-hub - name: SE_EVENT_BUS_PUBLISH_PORT value: "4442" - name: SE_EVENT_BUS_SUBSCRIBE_PORT value: "4443" - name: SE_NODE_MAX_SESSIONS value: "10" - name: SE_NODE_SESSION_TIMEOUT value: "300" - name: SE_VNC_NO_PASSWORD value: "1" volumeMounts: - name: dshm mountPath: /dev/shm resources: requests: cpu: 1000m memory: 2Gi limits: cpu: 2000m memory: 4Gi livenessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 2Gi

2.8.7.3 Firefox Node`k8s/selenium/firefox-node.yaml`

# TestMaster 自动化测试平台 - Firefox Node 部署 # 版本: 1.0.0 apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-selenium-firefox namespace: testmaster labels: app: testmaster component: selenium-firefox spec: replicas: 2 selector: matchLabels: app: testmaster component: selenium-firefox template: metadata: labels: app: testmaster component: selenium-firefox spec: containers: - name: selenium-firefox image: selenium/node-firefox:4.15.0 imagePullPolicy: IfNotPresent ports: - containerPort: 5555 name: node - containerPort: 7900 name: vnc env: - name: SE_EVENT_BUS_HOST value: testmaster-selenium-hub - name: SE_EVENT_BUS_PUBLISH_PORT value: "4442" - name: SE_EVENT_BUS_SUBSCRIBE_PORT value: "4443" - name: SE_NODE_MAX_SESSIONS value: "10" - name: SE_NODE_SESSION_TIMEOUT value: "300" - name: SE_VNC_NO_PASSWORD value: "1" volumeMounts: - name: dshm mountPath: /dev/shm resources: requests: cpu: 1000m memory: 2Gi limits: cpu: 2000m memory: 4Gi livenessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 2Gi

2.8.8 后端服务部署

2.8.8.1 Gateway`k8s/backend/gateway.yaml`

# TestMaster 自动化测试平台 - Gateway 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-gateway namespace: testmaster labels: app: testmaster component: gateway spec: type: ClusterIP ports: - port: 3000 targetPort: 3000 protocol: TCP name: http selector: app: testmaster component: gateway --- apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-gateway namespace: testmaster labels: app: testmaster component: gateway spec: replicas: 3 strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 selector: matchLabels: app: testmaster component: gateway template: metadata: labels: app: testmaster component: gateway annotations: prometheus.io/scrape: "true" prometheus.io/port: "3000" prometheus.io/path: "/metrics" spec: containers: - name: gateway image: testmaster/gateway:1.0.0 imagePullPolicy: IfNotPresent ports: - containerPort: 3000 name: http env: - name: NODE_ENV value: "production" - name: PORT value: "3000" # 从 ConfigMap 读取配置 - name: DB_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: DB_HOST - name: DB_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: DB_PORT - name: DB_NAME valueFrom: configMapKeyRef: name: testmaster-app-config key: DB_NAME - name: DB_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: DB_USER # 从 Secret 读取密码 - name: DB_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: POSTGRES_PASSWORD - name: REDIS_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: REDIS_PASSWORD - name: RABBITMQ_PASSWORD valueFrom: secretKeyRef: name: testmaster-mq-secrets key: RABBITMQ_PASSWORD - name: JWT_SECRET valueFrom: secretKeyRef: name: testmaster-app-secrets key: JWT_SECRET - name: MINIO_SECRET_KEY valueFrom: secretKeyRef: name: testmaster-storage-secrets key: MINIO_SECRET_KEY # Redis 配置 - name: REDIS_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: REDIS_HOST - name: REDIS_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: REDIS_PORT # RabbitMQ 配置 - name: RABBITMQ_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_HOST - name: RABBITMQ_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_PORT - name: RABBITMQ_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_USER - name: RABBITMQ_VHOST valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_VHOST # MinIO 配置 - name: MINIO_ENDPOINT valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_ENDPOINT - name: MINIO_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_PORT - name: MINIO_ACCESS_KEY valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_ACCESS_KEY - name: MINIO_USE_SSL valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_USE_SSL # 服务地址 - name: AI_GENERATOR_URL value: "http://testmaster-ai-generator:8001" - name: EXECUTOR_URL value: "http://testmaster-executor:8002" - name: PERFORMANCE_URL value: "http://testmaster-performance:8003" resources: requests: cpu: 500m memory: 512Mi limits: cpu: 1000m memory: 1Gi livenessProbe: httpGet: path: /api/health port: 3000 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /api/health port: 3000 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: testmaster-gateway-hpa namespace: testmaster labels: app: testmaster component: gateway spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: testmaster-gateway minReplicas: 3 maxReplicas: 10 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 70 - type: Resource resource: name: memory target: type: Utilization averageUtilization: 80 behavior: scaleDown: stabilizationWindowSeconds: 300 policies: - type: Percent value: 50 periodSeconds: 60 scaleUp: stabilizationWindowSeconds: 0 policies: - type: Percent value: 100 periodSeconds: 30 - type: Pods value: 2 periodSeconds: 30 selectPolicy: Max

2.8.8.2 AI Generator`k8s/backend/ai-generator.yaml`

# TestMaster 自动化测试平台 - AI Generator 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-ai-generator namespace: testmaster labels: app: testmaster component: ai-generator spec: type: ClusterIP ports: - port: 8001 targetPort: 8001 protocol: TCP name: http selector: app: testmaster component: ai-generator --- apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-ai-generator namespace: testmaster labels: app: testmaster component: ai-generator spec: replicas: 2 strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 selector: matchLabels: app: testmaster component: ai-generator template: metadata: labels: app: testmaster component: ai-generator annotations: prometheus.io/scrape: "true" prometheus.io/port: "8001" prometheus.io/path: "/metrics" spec: containers: - name: ai-generator image: testmaster/ai-generator:1.0.0 imagePullPolicy: IfNotPresent ports: - containerPort: 8001 name: http env: - name: ENVIRONMENT value: "production" - name: PORT value: "8001" # MongoDB 配置 - name: MONGODB_URI value: "mongodb://$(MONGODB_USER):$(MONGODB_PASSWORD)@$(MONGODB_HOST):$(MONGODB_PORT)/$(MONGODB_DATABASE)?authSource=admin" - name: MONGODB_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_HOST - name: MONGODB_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_PORT - name: MONGODB_DATABASE valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_DATABASE - name: MONGODB_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_USER - name: MONGODB_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: MONGODB_PASSWORD # Redis 配置 - name: REDIS_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: REDIS_HOST - name: REDIS_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: REDIS_PORT - name: REDIS_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: REDIS_PASSWORD # RabbitMQ 配置 - name: RABBITMQ_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_HOST - name: RABBITMQ_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_PORT - name: RABBITMQ_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_USER - name: RABBITMQ_PASSWORD valueFrom: secretKeyRef: name: testmaster-mq-secrets key: RABBITMQ_PASSWORD - name: RABBITMQ_VHOST valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_VHOST # AI API Keys - name: OPENAI_API_KEY valueFrom: secretKeyRef: name: testmaster-ai-secrets key: OPENAI_API_KEY - name: OPENAI_MODEL valueFrom: configMapKeyRef: name: testmaster-app-config key: OPENAI_MODEL - name: OPENAI_MAX_TOKENS valueFrom: configMapKeyRef: name: testmaster-app-config key: OPENAI_MAX_TOKENS - name: OPENAI_TEMPERATURE valueFrom: configMapKeyRef: name: testmaster-app-config key: OPENAI_TEMPERATURE - name: DEEPSEEK_API_KEY valueFrom: secretKeyRef: name: testmaster-ai-secrets key: DEEPSEEK_API_KEY - name: DEEPSEEK_MODEL valueFrom: configMapKeyRef: name: testmaster-app-config key: DEEPSEEK_MODEL - name: DEEPSEEK_BASE_URL valueFrom: configMapKeyRef: name: testmaster-app-config key: DEEPSEEK_BASE_URL - name: ANTHROPIC_API_KEY valueFrom: secretKeyRef: name: testmaster-ai-secrets key: ANTHROPIC_API_KEY - name: ANTHROPIC_MODEL valueFrom: configMapKeyRef: name: testmaster-app-config key: ANTHROPIC_MODEL - name: ANTHROPIC_MAX_TOKENS valueFrom: configMapKeyRef: name: testmaster-app-config key: ANTHROPIC_MAX_TOKENS resources: requests: cpu: 1000m memory: 1Gi limits: cpu: 2000m memory: 2Gi livenessProbe: httpGet: path: /health port: 8001 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /health port: 8001 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: testmaster-ai-generator-hpa namespace: testmaster labels: app: testmaster component: ai-generator spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: testmaster-ai-generator minReplicas: 2 maxReplicas: 8 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 70 - type: Resource resource: name: memory target: type: Utilization averageUtilization: 80

2.8.8.3 Executor`k8s/backend/executor.yaml`

# TestMaster 自动化测试平台 - Executor 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-executor namespace: testmaster labels: app: testmaster component: executor spec: type: ClusterIP ports: - port: 8002 targetPort: 8002 protocol: TCP name: http selector: app: testmaster component: executor --- apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-executor namespace: testmaster labels: app: testmaster component: executor spec: replicas: 3 strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 selector: matchLabels: app: testmaster component: executor template: metadata: labels: app: testmaster component: executor annotations: prometheus.io/scrape: "true" prometheus.io/port: "8002" prometheus.io/path: "/metrics" spec: containers: - name: executor image: testmaster/executor:1.0.0 imagePullPolicy: IfNotPresent ports: - containerPort: 8002 name: http env: - name: ENVIRONMENT value: "production" - name: PORT value: "8002" # MongoDB 配置 - name: MONGODB_URI value: "mongodb://$(MONGODB_USER):$(MONGODB_PASSWORD)@$(MONGODB_HOST):$(MONGODB_PORT)/$(MONGODB_DATABASE)?authSource=admin" - name: MONGODB_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_HOST - name: MONGODB_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_PORT - name: MONGODB_DATABASE valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_DATABASE - name: MONGODB_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: MONGODB_USER - name: MONGODB_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: MONGODB_PASSWORD # Redis 配置 - name: REDIS_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: REDIS_HOST - name: REDIS_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: REDIS_PORT - name: REDIS_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: REDIS_PASSWORD # RabbitMQ 配置 - name: RABBITMQ_HOST valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_HOST - name: RABBITMQ_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_PORT - name: RABBITMQ_USER valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_USER - name: RABBITMQ_PASSWORD valueFrom: secretKeyRef: name: testmaster-mq-secrets key: RABBITMQ_PASSWORD - name: RABBITMQ_VHOST valueFrom: configMapKeyRef: name: testmaster-app-config key: RABBITMQ_VHOST # Selenium 配置 - name: SELENIUM_HUB_URL valueFrom: configMapKeyRef: name: testmaster-app-config key: SELENIUM_HUB_URL - name: SELENIUM_IMPLICIT_WAIT valueFrom: configMapKeyRef: name: testmaster-app-config key: SELENIUM_IMPLICIT_WAIT - name: SELENIUM_PAGE_LOAD_TIMEOUT valueFrom: configMapKeyRef: name: testmaster-app-config key: SELENIUM_PAGE_LOAD_TIMEOUT - name: SELENIUM_SCRIPT_TIMEOUT valueFrom: configMapKeyRef: name: testmaster-app-config key: SELENIUM_SCRIPT_TIMEOUT # MinIO 配置 - name: MINIO_ENDPOINT valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_ENDPOINT - name: MINIO_PORT valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_PORT - name: MINIO_ACCESS_KEY valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_ACCESS_KEY - name: MINIO_SECRET_KEY valueFrom: secretKeyRef: name: testmaster-storage-secrets key: MINIO_SECRET_KEY - name: MINIO_USE_SSL valueFrom: configMapKeyRef: name: testmaster-app-config key: MINIO_USE_SSL # 执行器配置 - name: MAX_PARALLEL_EXECUTIONS valueFrom: configMapKeyRef: name: testmaster-app-config key: MAX_PARALLEL_EXECUTIONS - name: EXECUTION_TIMEOUT valueFrom: configMapKeyRef: name: testmaster-app-config key: EXECUTION_TIMEOUT - name: SCREENSHOT_ON_FAILURE valueFrom: configMapKeyRef: name: testmaster-app-config key: SCREENSHOT_ON_FAILURE - name: VIDEO_RECORDING valueFrom: configMapKeyRef: name: testmaster-app-config key: VIDEO_RECORDING - name: RETRY_FAILED_TESTS valueFrom: configMapKeyRef: name: testmaster-app-config key: RETRY_FAILED_TESTS - name: MAX_RETRY_ATTEMPTS valueFrom: configMapKeyRef: name: testmaster-app-config key: MAX_RETRY_ATTEMPTS resources: requests: cpu: 1000m memory: 1Gi limits: cpu: 2000m memory: 2Gi livenessProbe: httpGet: path: /health port: 8002 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /health port: 8002 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: testmaster-executor-hpa namespace: testmaster labels: app: testmaster component: executor spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: testmaster-executor minReplicas: 3 maxReplicas: 10 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 70 - type: Resource resource: name: memory target: type: Utilization averageUtilization: 80

继续下一部分...

2.8.9 前端和Nginx部署

2.8.9.1 Frontend`k8s/frontend/frontend.yaml`

# TestMaster 自动化测试平台 - Frontend 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-frontend namespace: testmaster labels: app: testmaster component: frontend spec: type: ClusterIP ports: - port: 5173 targetPort: 5173 protocol: TCP name: http selector: app: testmaster component: frontend --- apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-frontend namespace: testmaster labels: app: testmaster component: frontend spec: replicas: 2 strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 selector: matchLabels: app: testmaster component: frontend template: metadata: labels: app: testmaster component: frontend spec: containers: - name: frontend image: testmaster/frontend:1.0.0 imagePullPolicy: IfNotPresent ports: - containerPort: 5173 name: http env: - name: NODE_ENV value: "production" - name: VITE_API_BASE_URL value: "http://testmaster-nginx/api" - name: VITE_WS_URL value: "ws://testmaster-nginx" resources: requests: cpu: 250m memory: 256Mi limits: cpu: 500m memory: 512Mi livenessProbe: httpGet: path: / port: 5173 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: / port: 5173 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3

2.8.9.2 Nginx`k8s/nginx/nginx.yaml`

# TestMaster 自动化测试平台 - Nginx 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-nginx namespace: testmaster labels: app: testmaster component: nginx spec: type: LoadBalancer ports: - port: 80 targetPort: 80 protocol: TCP name: http - port: 443 targetPort: 443 protocol: TCP name: https selector: app: testmaster component: nginx --- apiVersion: apps/v1 kind: Deployment metadata: name: testmaster-nginx namespace: testmaster labels: app: testmaster component: nginx spec: replicas: 2 strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 selector: matchLabels: app: testmaster component: nginx template: metadata: labels: app: testmaster component: nginx spec: containers: - name: nginx image: nginx:alpine imagePullPolicy: IfNotPresent ports: - containerPort: 80 name: http - containerPort: 443 name: https volumeMounts: - name: nginx-config mountPath: /etc/nginx/nginx.conf subPath: nginx.conf resources: requests: cpu: 250m memory: 256Mi limits: cpu: 500m memory: 512Mi livenessProbe: httpGet: path: /health port: 80 initialDelaySeconds: 10 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /health port: 80 initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumes: - name: nginx-config configMap: name: testmaster-nginx-config

2.8.10 Ingress 配置`k8s/ingress/ingress.yaml`

# TestMaster 自动化测试平台 - Ingress 配置 # 版本: 1.0.0 apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: testmaster-ingress namespace: testmaster labels: app: testmaster annotations: kubernetes.io/ingress.class: nginx cert-manager.io/cluster-issuer: letsencrypt-prod nginx.ingress.kubernetes.io/ssl-redirect: "true" nginx.ingress.kubernetes.io/proxy-body-size: "100m" nginx.ingress.kubernetes.io/proxy-connect-timeout: "300" nginx.ingress.kubernetes.io/proxy-send-timeout: "300" nginx.ingress.kubernetes.io/proxy-read-timeout: "300" nginx.ingress.kubernetes.io/websocket-services: testmaster-gateway nginx.ingress.kubernetes.io/configuration-snippet: | more_set_headers "X-Frame-Options: SAMEORIGIN"; more_set_headers "X-Content-Type-Options: nosniff"; more_set_headers "X-XSS-Protection: 1; mode=block"; spec: tls: - hosts: - testmaster.example.com - api.testmaster.example.com secretName: testmaster-tls rules: # 主域名 - 前端应用 - host: testmaster.example.com http: paths: - path: / pathType: Prefix backend: service: name: testmaster-nginx port: number: 80 # API 子域名 - host: api.testmaster.example.com http: paths: - path: / pathType: Prefix backend: service: name: testmaster-gateway port: number: 3000 --- # Grafana Ingress apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: testmaster-grafana-ingress namespace: testmaster labels: app: testmaster component: grafana annotations: kubernetes.io/ingress.class: nginx cert-manager.io/cluster-issuer: letsencrypt-prod nginx.ingress.kubernetes.io/ssl-redirect: "true" spec: tls: - hosts: - grafana.testmaster.example.com secretName: testmaster-grafana-tls rules: - host: grafana.testmaster.example.com http: paths: - path: / pathType: Prefix backend: service: name: testmaster-grafana port: number: 3000 --- # Kibana Ingress apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: testmaster-kibana-ingress namespace: testmaster labels: app: testmaster component: kibana annotations: kubernetes.io/ingress.class: nginx cert-manager.io/cluster-issuer: letsencrypt-prod nginx.ingress.kubernetes.io/ssl-redirect: "true" spec: tls: - hosts: - kibana.testmaster.example.com secretName: testmaster-kibana-tls rules: - host: kibana.testmaster.example.com http: paths: - path: / pathType: Prefix backend: service: name: testmaster-kibana port: number: 5601

2.8.11 监控系统部署

2.8.11.1 Prometheus`k8s/monitoring/prometheus.yaml`

# TestMaster 自动化测试平台 - Prometheus 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-prometheus namespace: testmaster labels: app: testmaster component: prometheus spec: type: ClusterIP ports: - port: 9090 targetPort: 9090 protocol: TCP name: prometheus selector: app: testmaster component: prometheus --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-prometheus namespace: testmaster labels: app: testmaster component: prometheus spec: serviceName: testmaster-prometheus replicas: 1 selector: matchLabels: app: testmaster component: prometheus template: metadata: labels: app: testmaster component: prometheus spec: serviceAccountName: testmaster-prometheus containers: - name: prometheus image: prom/prometheus:latest imagePullPolicy: IfNotPresent args: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' - '--storage.tsdb.retention.time=30d' - '--web.enable-lifecycle' ports: - containerPort: 9090 name: prometheus volumeMounts: - name: prometheus-config mountPath: /etc/prometheus - name: prometheus-data mountPath: /prometheus resources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 2Gi livenessProbe: httpGet: path: /-/healthy port: 9090 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /-/ready port: 9090 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumes: - name: prometheus-config configMap: name: testmaster-prometheus-config volumeClaimTemplates: - metadata: name: prometheus-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-standard resources: requests: storage: 50Gi --- # ServiceAccount for Prometheus apiVersion: v1 kind: ServiceAccount metadata: name: testmaster-prometheus namespace: testmaster labels: app: testmaster component: prometheus --- # ClusterRole for Prometheus apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: testmaster-prometheus labels: app: testmaster component: prometheus rules: - apiGroups: [""] resources: - nodes - nodes/proxy - services - endpoints - pods verbs: ["get", "list", "watch"] - apiGroups: - extensions resources: - ingresses verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- # ClusterRoleBinding for Prometheus apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: testmaster-prometheus labels: app: testmaster component: prometheus roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: testmaster-prometheus subjects: - kind: ServiceAccount name: testmaster-prometheus namespace: testmaster

2.8.11.2 Grafana`k8s/monitoring/grafana.yaml`

# TestMaster 自动化测试平台 - Grafana 部署 # 版本: 1.0.0 apiVersion: v1 kind: Service metadata: name: testmaster-grafana namespace: testmaster labels: app: testmaster component: grafana spec: type: ClusterIP ports: - port: 3000 targetPort: 3000 protocol: TCP name: grafana selector: app: testmaster component: grafana --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-grafana namespace: testmaster labels: app: testmaster component: grafana spec: serviceName: testmaster-grafana replicas: 1 selector: matchLabels: app: testmaster component: grafana template: metadata: labels: app: testmaster component: grafana spec: containers: - name: grafana image: grafana/grafana:latest imagePullPolicy: IfNotPresent ports: - containerPort: 3000 name: grafana env: - name: GF_SECURITY_ADMIN_USER value: admin - name: GF_SECURITY_ADMIN_PASSWORD value: admin - name: GF_INSTALL_PLUGINS value: grafana-clock-panel,grafana-simple-json-datasource volumeMounts: - name: grafana-data mountPath: /var/lib/grafana resources: requests: cpu: 250m memory: 512Mi limits: cpu: 1000m memory: 1Gi livenessProbe: httpGet: path: /api/health port: 3000 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /api/health port: 3000 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumeClaimTemplates: - metadata: name: grafana-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-standard resources: requests: storage: 10Gi

2.8.12 部署脚本

2.8.12.1 一键部署脚本`k8s/deploy.sh`

#!/bin/bash # TestMaster 自动化测试平台 - Kubernetes 一键部署脚本 # 版本: 1.0.0 set -e # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # 打印带颜色的消息 print_message() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } # 打印标题 print_header() { echo "" echo "============================================================================" echo " $1" echo "============================================================================" echo "" } # 检查 kubectl 是否安装 check_kubectl() { if ! command -v kubectl &> /dev/null; then print_message $RED "❌ kubectl 未安装，请先安装 kubectl" exit 1 fi print_message $GREEN "✅ kubectl 已安装" } # 检查集群连接 check_cluster() { if ! kubectl cluster-info &> /dev/null; then print_message $RED "❌ 无法连接到 Kubernetes 集群" exit 1 fi print_message $GREEN "✅ 成功连接到 Kubernetes 集群" } # 创建命名空间 create_namespace() { print_message $YELLOW "📦 创建命名空间..." kubectl apply -f namespace.yaml print_message $GREEN "✅ 命名空间创建完成" } # 创建 ConfigMaps create_configmaps() { print_message $YELLOW "⚙️ 创建 ConfigMaps..." kubectl apply -f configmaps/ print_message $GREEN "✅ ConfigMaps 创建完成" } # 创建 Secrets create_secrets() { print_message $YELLOW "🔐 创建 Secrets..." # 检查是否存在 secrets.yaml if [ ! -f "secrets/secrets.yaml" ]; then print_message $RED "❌ 未找到 secrets/secrets.yaml 文件" print_message $YELLOW "请先配置 secrets/secrets.yaml 文件" exit 1 fi kubectl apply -f secrets/ print_message $GREEN "✅ Secrets 创建完成" } # 创建存储 create_storage() { print_message $YELLOW "💾 创建存储..." kubectl apply -f storage/ print_message $GREEN "✅ 存储创建完成" } # 部署数据库 deploy_databases() { print_message $YELLOW "🗄️ 部署数据库..." kubectl apply -f databases/ # 等待数据库就绪 print_message $YELLOW "⏳ 等待数据库就绪..." kubectl wait --for=condition=ready pod -l component=postgres -n testmaster --timeout=300s kubectl wait --for=condition=ready pod -l component=mongodb -n testmaster --timeout=300s kubectl wait --for=condition=ready pod -l component=redis -n testmaster --timeout=300s print_message $GREEN "✅ 数据库部署完成" } # 部署服务 deploy_services() { print_message $YELLOW "🔧 部署服务..." kubectl apply -f services/ # 等待服务就绪 print_message $YELLOW "⏳ 等待服务就绪..." kubectl wait --for=condition=ready pod -l component=rabbitmq -n testmaster --timeout=300s kubectl wait --for=condition=ready pod -l component=minio -n testmaster --timeout=300s print_message $GREEN "✅ 服务部署完成" } # 部署 Selenium Grid deploy_selenium() { print_message $YELLOW "🌐 部署 Selenium Grid..." kubectl apply -f selenium/ # 等待 Selenium Hub 就绪 print_message $YELLOW "⏳ 等待 Selenium Hub 就绪..." kubectl wait --for=condition=ready pod -l component=selenium-hub -n testmaster --timeout=300s print_message $GREEN "✅ Selenium Grid 部署完成" } # 部署后端服务 deploy_backend() { print_message $YELLOW "🚀 部署后端服务..." kubectl apply -f backend/ # 等待后端服务就绪 print_message $YELLOW "⏳ 等待后端服务就绪..." kubectl wait --for=condition=ready pod -l component=gateway -n testmaster --timeout=300s kubectl wait --for=condition=ready pod -l component=ai-generator -n testmaster --timeout=300s kubectl wait --for=condition=ready pod -l component=executor -n testmaster --timeout=300s print_message $GREEN "✅ 后端服务部署完成" } # 部署前端 deploy_frontend() { print_message $YELLOW "🎨 部署前端..." kubectl apply -f frontend/ # 等待前端就绪 print_message $YELLOW "⏳ 等待前端就绪..." kubectl wait --for=condition=ready pod -l component=frontend -n testmaster --timeout=300s print_message $GREEN "✅ 前端部署完成" } # 部署 Nginx deploy_nginx() { print_message $YELLOW "🔀 部署 Nginx..." kubectl apply -f nginx/ # 等待 Nginx 就绪 print_message $YELLOW "⏳ 等待 Nginx 就绪..." kubectl wait --for=condition=ready pod -l component=nginx -n testmaster --timeout=300s print_message $GREEN "✅ Nginx 部署完成" } # 部署 Ingress deploy_ingress() { print_message $YELLOW "🌍 部署 Ingress..." kubectl apply -f ingress/ print_message $GREEN "✅ Ingress 部署完成" } # 部署监控 deploy_monitoring() { print_message $YELLOW "📊 部署监控系统..." kubectl apply -f monitoring/ # 等待监控服务就绪 print_message $YELLOW "⏳ 等待监控服务就绪..." kubectl wait --for=condition=ready pod -l component=prometheus -n testmaster --timeout=300s kubectl wait --for=condition=ready pod -l component=grafana -n testmaster --timeout=300s print_message $GREEN "✅ 监控系统部署完成" } # 显示部署状态 show_status() { print_header "部署状态" kubectl get all -n testmaster echo "" print_header "服务端点" # 获取 LoadBalancer IP NGINX_IP=$(kubectl get svc testmaster-nginx -n testmaster -o jsonpath='{.status.loadBalancer.ingress[0].ip}') if [ -z "$NGINX_IP" ]; then NGINX_IP=$(kubectl get svc testmaster-nginx -n testmaster -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') fi if [ -n "$NGINX_IP" ]; then echo "🌐 应用访问地址: http://$NGINX_IP" echo "📊 Grafana: http://grafana.testmaster.example.com" echo "📈 Kibana: http://kibana.testmaster.example.com" else print_message $YELLOW "⚠️ LoadBalancer IP 尚未分配，请稍后检查" fi } # 主函数 main() { print_header "TestMaster 自动化测试平台 - Kubernetes 部署" # 解析参数 local command=${1:-deploy} case $command in deploy) check_kubectl check_cluster create_namespace create_configmaps create_secrets create_storage deploy_databases deploy_services deploy_selenium deploy_backend deploy_frontend deploy_nginx deploy_ingress deploy_monitoring show_status print_header "部署完成" print_message $GREEN "✅ TestMaster 已成功部署到 Kubernetes 集群" ;; delete) print_message $YELLOW "🗑️ 删除所有资源..." kubectl delete namespace testmaster print_message $GREEN "✅ 所有资源已删除" ;; status) show_status ;; logs) local component=${2:-gateway} print_message $YELLOW "📋 查看 $component 日志..." kubectl logs -f -l component=$component -n testmaster ;; restart) local component=${2:-gateway} print_message $YELLOW "🔄 重启 $component..." kubectl rollout restart deployment/testmaster-$component -n testmaster print_message $GREEN "✅ $component 已重启" ;; scale) local component=${2:-gateway} local replicas=${3:-3} print_message $YELLOW "📈 扩展 $component 到 $replicas 个副本..." kubectl scale deployment/testmaster-$component --replicas=$replicas -n testmaster print_message $GREEN "✅ $component 已扩展" ;; *) echo "用法: $0 {deploy|delete|status|logs|restart|scale} [component] [replicas]" echo "" echo "命令:" echo " deploy - 部署所有资源" echo " delete - 删除所有资源" echo " status - 查看部署状态" echo " logs - 查看组件日志" echo " restart - 重启组件" echo " scale - 扩展组件副本数" exit 1 ;; esac } # 执行主函数 main "$@"

2.8.12.2 健康检查脚本`k8s/health-check.sh`

#!/bin/bash # TestMaster 自动化测试平台 - Kubernetes 健康检查脚本 # 版本: 1.0.0 set -e # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # 检查 Pod 健康状态 check_pods() { echo "============================================================================" echo " 检查 Pod 健康状态" echo "============================================================================" echo "" local all_healthy=true # 获取所有 Pod pods=$(kubectl get pods -n testmaster -o json | jq -r '.items[] | "\(.metadata.name) \(.status.phase) \(.status.conditions[] | select(.type=="Ready") | .status)"') while IFS= read -r line; do pod_name=$(echo $line | awk '{print $1}') phase=$(echo $line | awk '{print $2}') ready=$(echo $line | awk '{print $3}') if [ "$phase" = "Running" ] && [ "$ready" = "True" ]; then echo -e "${GREEN}✅ $pod_name - 健康${NC}" else echo -e "${RED}❌ $pod_name - 不健康 (Phase: $phase, Ready: $ready)${NC}" all_healthy=false fi done <<< "$pods" echo "" if [ "$all_healthy" = true ]; then echo -e "${GREEN}✅ 所有 Pod 健康${NC}" return 0 else echo -e "${RED}❌ 部分 Pod 不健康${NC}" return 1 fi } # 检查服务端点 check_services() { echo "============================================================================" echo " 检查服务端点" echo "============================================================================" echo "" services=( "testmaster-gateway:3000:/api/health" "testmaster-ai-generator:8001:/health" "testmaster-executor:8002:/health" "testmaster-performance:8003:/health" ) local all_healthy=true for service in "${services[@]}"; do IFS=':' read -r name port path <<< "$service" # 端口转发 kubectl port-forward -n testmaster svc/$name $port:$port & PF_PID=$! sleep 2 # 检查健康端点 if curl -f -s http://localhost:$port$path > /dev/null 2>&1; then echo -e "${GREEN}✅ $name - 健康${NC}" else echo -e "${RED}❌ $name - 不健康${NC}" all_healthy=false fi # 停止端口转发 kill $PF_PID 2>/dev/null || true done echo "" if [ "$all_healthy" = true ]; then echo -e "${GREEN}✅ 所有服务健康${NC}" return 0 else echo -e "${RED}❌ 部分服务不健康${NC}" return 1 fi } # 检查资源使用 check_resources() { echo "============================================================================" echo " 检查资源使用" echo "============================================================================" echo "" kubectl top pods -n testmaster echo "" } # 主函数 main() { echo "" echo "============================================================================" echo " TestMaster Kubernetes 健康检查" echo "============================================================================" echo "" check_pods check_services check_resources echo "============================================================================" echo " 健康检查完成" echo "============================================================================" } main "$@"

TestMaster 自动化测试平台 - 第九部分：监控系统完整配置

2.9 监控系统完整配置

2.9.1 Prometheus Rules 配置

2.9.1.1 告警规则`k8s/monitoring/prometheus-rules.yaml`

# TestMaster 自动化测试平台 - Prometheus 告警规则 # 版本: 1.0.0 apiVersion: v1 kind: ConfigMap metadata: name: testmaster-prometheus-rules namespace: testmaster labels: app: testmaster component: prometheus data: alert.rules: | groups: # ============================================================================ # 系统级告警规则 # ============================================================================ - name: system_alerts interval: 30s rules: # Pod 重启告警 - alert: PodRestartingTooOften expr: rate(kube_pod_container_status_restarts_total{namespace="testmaster"}[15m]) > 0 for: 5m labels: severity: warning category: system annotations: summary: "Pod {{ $labels.pod }} 频繁重启" description: "Pod {{ $labels.pod }} 在过去15分钟内重启了 {{ $value }} 次" # Pod 不健康告警 - alert: PodNotReady expr: kube_pod_status_phase{namespace="testmaster", phase!="Running"} > 0 for: 5m labels: severity: critical category: system annotations: summary: "Pod {{ $labels.pod }} 不健康" description: "Pod {{ $labels.pod }} 状态为 {{ $labels.phase }}，已持续5分钟" # 节点资源不足告警 - alert: NodeMemoryPressure expr: kube_node_status_condition{condition="MemoryPressure", status="true"} > 0 for: 5m labels: severity: warning category: system annotations: summary: "节点 {{ $labels.node }} 内存压力" description: "节点 {{ $labels.node }} 内存不足" # 节点磁盘压力告警 - alert: NodeDiskPressure expr: kube_node_status_condition{condition="DiskPressure", status="true"} > 0 for: 5m labels: severity: warning category: system annotations: summary: "节点 {{ $labels.node }} 磁盘压力" description: "节点 {{ $labels.node }} 磁盘空间不足" # ============================================================================ # 应用级告警规则 # ============================================================================ - name: application_alerts interval: 30s rules: # Gateway 服务不可用 - alert: GatewayDown expr: up{job="testmaster-gateway"} == 0 for: 2m labels: severity: critical category: application service: gateway annotations: summary: "Gateway 服务不可用" description: "Gateway 服务已下线超过2分钟" # AI Generator 服务不可用 - alert: AIGeneratorDown expr: up{job="testmaster-ai-generator"} == 0 for: 2m labels: severity: critical category: application service: ai-generator annotations: summary: "AI Generator 服务不可用" description: "AI Generator 服务已下线超过2分钟" # Executor 服务不可用 - alert: ExecutorDown expr: up{job="testmaster-executor"} == 0 for: 2m labels: severity: critical category: application service: executor annotations: summary: "Executor 服务不可用" description: "Executor 服务已下线超过2分钟" # 高错误率告警 - alert: HighErrorRate expr: | sum(rate(http_requests_total{status=~"5.."}[5m])) by (service) / sum(rate(http_requests_total[5m])) by (service) > 0.05 for: 5m labels: severity: warning category: application annotations: summary: "{{ $labels.service }} 错误率过高" description: "{{ $labels.service }} 的5xx错误率为 {{ $value | humanizePercentage }}" # 高响应时间告警 - alert: HighResponseTime expr: | histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, service) ) > 2 for: 5m labels: severity: warning category: application annotations: summary: "{{ $labels.service }} 响应时间过长" description: "{{ $labels.service }} 的P95响应时间为 {{ $value }}s" # ============================================================================ # 资源使用告警规则 # ============================================================================ - name: resource_alerts interval: 30s rules: # CPU 使用率过高 - alert: HighCPUUsage expr: | sum(rate(container_cpu_usage_seconds_total{namespace="testmaster"}[5m])) by (pod) / sum(container_spec_cpu_quota{namespace="testmaster"} / container_spec_cpu_period{namespace="testmaster"}) by (pod) > 0.8 for: 5m labels: severity: warning category: resource annotations: summary: "Pod {{ $labels.pod }} CPU使用率过高" description: "Pod {{ $labels.pod }} CPU使用率为 {{ $value | humanizePercentage }}" # 内存使用率过高 - alert: HighMemoryUsage expr: | sum(container_memory_working_set_bytes{namespace="testmaster"}) by (pod) / sum(container_spec_memory_limit_bytes{namespace="testmaster"}) by (pod) > 0.8 for: 5m labels: severity: warning category: resource annotations: summary: "Pod {{ $labels.pod }} 内存使用率过高" description: "Pod {{ $labels.pod }} 内存使用率为 {{ $value | humanizePercentage }}" # 磁盘使用率过高 - alert: HighDiskUsage expr: | (node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}) / node_filesystem_size_bytes{mountpoint="/"} > 0.85 for: 5m labels: severity: warning category: resource annotations: summary: "节点 {{ $labels.instance }} 磁盘使用率过高" description: "节点 {{ $labels.instance }} 磁盘使用率为 {{ $value | humanizePercentage }}" # ============================================================================ # 数据库告警规则 # ============================================================================ - name: database_alerts interval: 30s rules: # PostgreSQL 连接数过高 - alert: PostgreSQLHighConnections expr: | sum(pg_stat_activity_count) by (instance) / pg_settings_max_connections > 0.8 for: 5m labels: severity: warning category: database database: postgresql annotations: summary: "PostgreSQL 连接数过高" description: "PostgreSQL 连接数使用率为 {{ $value | humanizePercentage }}" # PostgreSQL 慢查询 - alert: PostgreSQLSlowQueries expr: rate(pg_stat_statements_mean_exec_time[5m]) > 1000 for: 5m labels: severity: warning category: database database: postgresql annotations: summary: "PostgreSQL 存在慢查询" description: "平均查询时间为 {{ $value }}ms" # MongoDB 连接数过高 - alert: MongoDBHighConnections expr: | mongodb_connections{state="current"} / mongodb_connections{state="available"} > 0.8 for: 5m labels: severity: warning category: database database: mongodb annotations: summary: "MongoDB 连接数过高" description: "MongoDB 连接数使用率为 {{ $value | humanizePercentage }}" # Redis 内存使用过高 - alert: RedisHighMemory expr: | redis_memory_used_bytes / redis_memory_max_bytes > 0.8 for: 5m labels: severity: warning category: database database: redis annotations: summary: "Redis 内存使用过高" description: "Redis 内存使用率为 {{ $value | humanizePercentage }}" # Redis 连接数过高 - alert: RedisHighConnections expr: redis_connected_clients > 1000 for: 5m labels: severity: warning category: database database: redis annotations: summary: "Redis 连接数过高" description: "Redis 当前连接数为 {{ $value }}" # ============================================================================ # 消息队列告警规则 # ============================================================================ - name: messagequeue_alerts interval: 30s rules: # RabbitMQ 队列消息堆积 - alert: RabbitMQQueueBacklog expr: rabbitmq_queue_messages > 10000 for: 10m labels: severity: warning category: messagequeue annotations: summary: "RabbitMQ 队列 {{ $labels.queue }} 消息堆积" description: "队列 {{ $labels.queue }} 有 {{ $value }} 条未消费消息" # RabbitMQ 消费者数量为0 - alert: RabbitMQNoConsumers expr: rabbitmq_queue_consumers == 0 for: 5m labels: severity: critical category: messagequeue annotations: summary: "RabbitMQ 队列 {{ $labels.queue }} 无消费者" description: "队列 {{ $labels.queue }} 没有活跃的消费者" # RabbitMQ 连接数过高 - alert: RabbitMQHighConnections expr: rabbitmq_connections > 1000 for: 5m labels: severity: warning category: messagequeue annotations: summary: "RabbitMQ 连接数过高" description: "RabbitMQ 当前连接数为 {{ $value }}" # ============================================================================ # Selenium Grid 告警规则 # ============================================================================ - name: selenium_alerts interval: 30s rules: # Selenium Hub 不可用 - alert: SeleniumHubDown expr: up{job="selenium-hub"} == 0 for: 2m labels: severity: critical category: selenium annotations: summary: "Selenium Hub 不可用" description: "Selenium Hub 已下线超过2分钟" # Selenium Node 不足 - alert: SeleniumNodeShortage expr: selenium_grid_node_count < 3 for: 5m labels: severity: warning category: selenium annotations: summary: "Selenium Node 数量不足" description: "当前只有 {{ $value }} 个 Selenium Node 可用" # Selenium 会话队列过长 - alert: SeleniumSessionQueueTooLong expr: selenium_grid_session_queue_size > 10 for: 5m labels: severity: warning category: selenium annotations: summary: "Selenium 会话队列过长" description: "当前有 {{ $value }} 个会话在等待" # ============================================================================ # 业务指标告警规则 # ============================================================================ - name: business_alerts interval: 30s rules: # 测试执行失败率过高 - alert: HighTestFailureRate expr: | sum(rate(test_executions_total{status="failed"}[10m])) / sum(rate(test_executions_total[10m])) > 0.3 for: 10m labels: severity: warning category: business annotations: summary: "测试失败率过高" description: "过去10分钟测试失败率为 {{ $value | humanizePercentage }}" # AI 生成失败率过高 - alert: HighAIGenerationFailureRate expr: | sum(rate(ai_generations_total{status="failed"}[10m])) / sum(rate(ai_generations_total[10m])) > 0.2 for: 10m labels: severity: warning category: business annotations: summary: "AI生成失败率过高" description: "过去10分钟AI生成失败率为 {{ $value | humanizePercentage }}" # 测试执行时间过长 - alert: LongTestExecutionTime expr: | histogram_quantile(0.95, sum(rate(test_execution_duration_seconds_bucket[10m])) by (le) ) > 600 for: 10m labels: severity: warning category: business annotations: summary: "测试执行时间过长" description: "P95测试执行时间为 {{ $value }}s" # 并发测试数量过高 - alert: HighConcurrentTests expr: test_executions_running > 50 for: 5m labels: severity: warning category: business annotations: summary: "并发测试数量过高" description: "当前有 {{ $value }} 个测试正在执行" # ============================================================================ # 存储告警规则 # ============================================================================ - name: storage_alerts interval: 30s rules: # MinIO 存储空间不足 - alert: MinIOLowStorage expr: | (minio_disk_storage_total_bytes - minio_disk_storage_free_bytes) / minio_disk_storage_total_bytes > 0.85 for: 5m labels: severity: warning category: storage annotations: summary: "MinIO 存储空间不足" description: "MinIO 存储使用率为 {{ $value | humanizePercentage }}" # PVC 存储空间不足 - alert: PVCLowStorage expr: | (kubelet_volume_stats_capacity_bytes - kubelet_volume_stats_available_bytes) / kubelet_volume_stats_capacity_bytes > 0.85 for: 5m labels: severity: warning category: storage annotations: summary: "PVC {{ $labels.persistentvolumeclaim }} 存储空间不足" description: "PVC 存储使用率为 {{ $value | humanizePercentage }}" --- # 应用到 Prometheus apiVersion: v1 kind: ConfigMap metadata: name: testmaster-prometheus-config-with-rules namespace: testmaster labels: app: testmaster component: prometheus data: prometheus.yml: | global: scrape_interval: 15s evaluation_interval: 15s external_labels: cluster: 'testmaster-k8s' environment: 'production' # 加载告警规则 rule_files: - '/etc/prometheus/rules/*.rules' # Alertmanager 配置 alerting: alertmanagers: - static_configs: - targets: - testmaster-alertmanager:9093 scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https - job_name: 'kubernetes-nodes' kubernetes_sd_configs: - role: node scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: kubernetes_pod_name - job_name: 'testmaster-gateway' static_configs: - targets: ['testmaster-gateway:3000'] labels: service: 'gateway' - job_name: 'testmaster-ai-generator' static_configs: - targets: ['testmaster-ai-generator:8001'] labels: service: 'ai-generator' - job_name: 'testmaster-executor' static_configs: - targets: ['testmaster-executor:8002'] labels: service: 'executor' - job_name: 'testmaster-performance' static_configs: - targets: ['testmaster-performance:8003'] labels: service: 'performance' - job_name: 'postgres-exporter' static_configs: - targets: ['testmaster-postgres:9187'] labels: database: 'postgresql' - job_name: 'mongodb-exporter' static_configs: - targets: ['testmaster-mongodb:9216'] labels: database: 'mongodb' - job_name: 'redis-exporter' static_configs: - targets: ['testmaster-redis:9121'] labels: database: 'redis'

2.9.2 Alertmanager 配置

2.9.2.1 Alertmanager 部署`k8s/monitoring/alertmanager.yaml`

# TestMaster 自动化测试平台 - Alertmanager 部署 # 版本: 1.0.0 apiVersion: v1 kind: ConfigMap metadata: name: testmaster-alertmanager-config namespace: testmaster labels: app: testmaster component: alertmanager data: alertmanager.yml: | global: resolve_timeout: 5m smtp_smarthost: 'smtp.gmail.com:587' smtp_from: 'testmaster-alerts@example.com' smtp_auth_username: 'testmaster-alerts@example.com' smtp_auth_password: 'your-email-password' smtp_require_tls: true slack_api_url: 'https://hooks.slack.com/services/YOUR/WEBHOOK/URL' # 路由配置 route: group_by: ['alertname', 'cluster', 'service'] group_wait: 10s group_interval: 10s repeat_interval: 12h receiver: 'default' routes: # 系统级告警 - 发送到运维团队 - match: category: system receiver: 'ops-team' continue: true # 应用级告警 - 发送到开发团队 - match: category: application receiver: 'dev-team' continue: true # 数据库告警 - 发送到DBA团队 - match: category: database receiver: 'dba-team' continue: true # 业务告警 - 发送到产品团队 - match: category: business receiver: 'product-team' continue: true # 严重告警 - 发送到所有人 - match: severity: critical receiver: 'critical-alerts' continue: true # 抑制规则 inhibit_rules: # 如果节点不可达，抑制该节点上的所有Pod告警 - source_match: alertname: 'NodeDown' target_match: category: 'system' equal: ['node'] # 如果服务完全不可用，抑制高错误率告警 - source_match: alertname: 'ServiceDown' target_match: alertname: 'HighErrorRate' equal: ['service'] # 接收器配置 receivers: # 默认接收器 - name: 'default' email_configs: - to: 'testmaster-team@example.com' headers: Subject: '[TestMaster] {{ .GroupLabels.alertname }}' webhook_configs: - url: 'http://testmaster-gateway:3000/api/webhooks/alerts' send_resolved: true # 运维团队 - name: 'ops-team' email_configs: - to: 'ops-team@example.com' headers: Subject: '[TestMaster OPS] {{ .GroupLabels.alertname }}' slack_configs: - channel: '#ops-alerts' title: '{{ .GroupLabels.alertname }}' text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}' send_resolved: true webhook_configs: - url: 'https://outlook.office.com/webhook/YOUR/TEAMS/WEBHOOK' send_resolved: true # 开发团队 - name: 'dev-team' email_configs: - to: 'dev-team@example.com' headers: Subject: '[TestMaster DEV] {{ .GroupLabels.alertname }}' slack_configs: - channel: '#dev-alerts' title: '{{ .GroupLabels.alertname }}' text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}' send_resolved: true # DBA团队 - name: 'dba-team' email_configs: - to: 'dba-team@example.com' headers: Subject: '[TestMaster DBA] {{ .GroupLabels.alertname }}' slack_configs: - channel: '#dba-alerts' title: '{{ .GroupLabels.alertname }}' text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}' send_resolved: true # 产品团队 - name: 'product-team' email_configs: - to: 'product-team@example.com' headers: Subject: '[TestMaster PRODUCT] {{ .GroupLabels.alertname }}' slack_configs: - channel: '#product-alerts' title: '{{ .GroupLabels.alertname }}' text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}' send_resolved: true # 严重告警 - name: 'critical-alerts' email_configs: - to: 'all-team@example.com' headers: Subject: '[CRITICAL] TestMaster {{ .GroupLabels.alertname }}' slack_configs: - channel: '#critical-alerts' title: '🚨 CRITICAL: {{ .GroupLabels.alertname }}' text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}' send_resolved: true color: 'danger' webhook_configs: - url: 'https://outlook.office.com/webhook/YOUR/TEAMS/WEBHOOK' send_resolved: true # PagerDuty 集成（可选） # - url: 'https://events.pagerduty.com/v2/enqueue' # send_resolved: true --- apiVersion: v1 kind: Service metadata: name: testmaster-alertmanager namespace: testmaster labels: app: testmaster component: alertmanager spec: type: ClusterIP ports: - port: 9093 targetPort: 9093 protocol: TCP name: alertmanager selector: app: testmaster component: alertmanager --- apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-alertmanager namespace: testmaster labels: app: testmaster component: alertmanager spec: serviceName: testmaster-alertmanager replicas: 3 selector: matchLabels: app: testmaster component: alertmanager template: metadata: labels: app: testmaster component: alertmanager spec: containers: - name: alertmanager image: prom/alertmanager:latest imagePullPolicy: IfNotPresent args: - '--config.file=/etc/alertmanager/alertmanager.yml' - '--storage.path=/alertmanager' - '--cluster.advertise-address=$(POD_IP):9094' - '--cluster.listen-address=0.0.0.0:9094' - '--cluster.peer=testmaster-alertmanager-0.testmaster-alertmanager:9094' - '--cluster.peer=testmaster-alertmanager-1.testmaster-alertmanager:9094' - '--cluster.peer=testmaster-alertmanager-2.testmaster-alertmanager:9094' ports: - containerPort: 9093 name: alertmanager - containerPort: 9094 name: cluster env: - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP volumeMounts: - name: alertmanager-config mountPath: /etc/alertmanager - name: alertmanager-data mountPath: /alertmanager resources: requests: cpu: 250m memory: 256Mi limits: cpu: 500m memory: 512Mi livenessProbe: httpGet: path: /-/healthy port: 9093 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /-/ready port: 9093 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumes: - name: alertmanager-config configMap: name: testmaster-alertmanager-config volumeClaimTemplates: - metadata: name: alertmanager-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-standard resources: requests: storage: 10Gi

2.9.3 Grafana Dashboards 配置

2.9.3.1 系统概览 Dashboard`k8s/monitoring/grafana-dashboards/system-overview.json`

{ "dashboard": { "title": "TestMaster - 系统概览", "tags": ["testmaster", "overview"], "timezone": "browser", "schemaVersion": 16, "version": 1, "refresh": "30s", "panels": [ { "id": 1, "title": "系统健康状态", "type": "stat", "gridPos": {"x": 0, "y": 0, "w": 6, "h": 4}, "targets": [ { "expr": "up{job=~\"testmaster-.*\"}", "legendFormat": "{{ job }}", "refId": "A" } ], "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value_and_name" }, "fieldConfig": { "defaults": { "mappings": [ {"type": "value", "value": "1", "text": "运行中"}, {"type": "value", "value": "0", "text": "离线"} ], "thresholds": { "mode": "absolute", "steps": [ {"color": "red", "value": null}, {"color": "green", "value": 1} ] } } } }, { "id": 2, "title": "CPU 使用率", "type": "graph", "gridPos": {"x": 6, "y": 0, "w": 9, "h": 4}, "targets": [ { "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"testmaster\"}[5m])) by (pod)", "legendFormat": "{{ pod }}", "refId": "A" } ], "yaxes": [ {"format": "percentunit", "label": "CPU使用率"}, {"format": "short"} ] }, { "id": 3, "title": "内存使用率", "type": "graph", "gridPos": {"x": 15, "y": 0, "w": 9, "h": 4}, "targets": [ { "expr": "sum(container_memory_working_set_bytes{namespace=\"testmaster\"}) by (pod) / sum(container_spec_memory_limit_bytes{namespace=\"testmaster\"}) by (pod)", "legendFormat": "{{ pod }}", "refId": "A" } ], "yaxes": [ {"format": "percentunit", "label": "内存使用率"}, {"format": "short"} ] }, { "id": 4, "title": "请求速率", "type": "graph", "gridPos": {"x": 0, "y": 4, "w": 12, "h": 6}, "targets": [ { "expr": "sum(rate(http_requests_total{namespace=\"testmaster\"}[5m])) by (service)", "legendFormat": "{{ service }}", "refId": "A" } ], "yaxes": [ {"format": "reqps", "label": "请求/秒"}, {"format": "short"} ] }, { "id": 5, "title": "错误率", "type": "graph", "gridPos": {"x": 12, "y": 4, "w": 12, "h": 6}, "targets": [ { "expr": "sum(rate(http_requests_total{namespace=\"testmaster\",status=~\"5..\"}[5m])) by (service) / sum(rate(http_requests_total{namespace=\"testmaster\"}[5m])) by (service)", "legendFormat": "{{ service }}", "refId": "A" } ], "yaxes": [ {"format": "percentunit", "label": "错误率"}, {"format": "short"} ], "alert": { "conditions": [ { "evaluator": {"params": [0.05], "type": "gt"}, "operator": {"type": "and"}, "query": {"params": ["A", "5m", "now"]}, "reducer": {"params": [], "type": "avg"}, "type": "query" } ], "executionErrorState": "alerting", "frequency": "1m", "handler": 1, "name": "高错误率告警", "noDataState": "no_data", "notifications": [] } }, { "id": 6, "title": "响应时间 (P95)", "type": "graph", "gridPos": {"x": 0, "y": 10, "w": 12, "h": 6}, "targets": [ { "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{namespace=\"testmaster\"}[5m])) by (le, service))", "legendFormat": "{{ service }}", "refId": "A" } ], "yaxes": [ {"format": "s", "label": "响应时间"}, {"format": "short"} ] }, { "id": 7, "title": "Pod 状态", "type": "table", "gridPos": {"x": 12, "y": 10, "w": 12, "h": 6}, "targets": [ { "expr": "kube_pod_status_phase{namespace=\"testmaster\"}", "format": "table", "instant": true, "refId": "A" } ], "transformations": [ { "id": "organize", "options": { "excludeByName": {}, "indexByName": {}, "renameByName": { "pod": "Pod名称", "phase": "状态", "Value": "值" } } } ] } ] } }

2.9.3.2 应用性能 Dashboard`k8s/monitoring/grafana-dashboards/application-performance.json`

{ "dashboard": { "title": "TestMaster - 应用性能", "tags": ["testmaster", "performance"], "timezone": "browser", "schemaVersion": 16, "version": 1, "refresh": "30s", "panels": [ { "id": 1, "title": "Gateway 请求速率", "type": "graph", "gridPos": {"x": 0, "y": 0, "w": 12, "h": 6}, "targets": [ { "expr": "sum(rate(http_requests_total{service=\"gateway\"}[5m])) by (method, path)", "legendFormat": "{{ method }} {{ path }}", "refId": "A" } ] }, { "id": 2, "title": "Gateway 响应时间分布", "type": "heatmap", "gridPos": {"x": 12, "y": 0, "w": 12, "h": 6}, "targets": [ { "expr": "sum(rate(http_request_duration_seconds_bucket{service=\"gateway\"}[5m])) by (le)", "format": "heatmap", "legendFormat": "{{ le }}", "refId": "A" } ] }, { "id": 3, "title": "AI Generator 性能", "type": "graph", "gridPos": {"x": 0, "y": 6, "w": 8, "h": 6}, "targets": [ { "expr": "rate(ai_generations_total{service=\"ai-generator\"}[5m])", "legendFormat": "生成速率", "refId": "A" }, { "expr": "rate(ai_generations_total{service=\"ai-generator\",status=\"success\"}[5m])", "legendFormat": "成功率", "refId": "B" } ] }, { "id": 4, "title": "Executor 执行统计", "type": "graph", "gridPos": {"x": 8, "y": 6, "w": 8, "h": 6}, "targets": [ { "expr": "test_executions_running", "legendFormat": "运行中", "refId": "A" }, { "expr": "test_executions_queued", "legendFormat": "队列中", "refId": "B" } ] }, { "id": 5, "title": "性能测试并发数", "type": "graph", "gridPos": {"x": 16, "y": 6, "w": 8, "h": 6}, "targets": [ { "expr": "performance_test_users", "legendFormat": "虚拟用户数", "refId": "A" } ] }, { "id": 6, "title": "数据库连接池", "type": "graph", "gridPos": {"x": 0, "y": 12, "w": 12, "h": 6}, "targets": [ { "expr": "pg_stat_activity_count", "legendFormat": "PostgreSQL 连接数", "refId": "A" }, { "expr": "mongodb_connections{state=\"current\"}", "legendFormat": "MongoDB 连接数", "refId": "B" }, { "expr": "redis_connected_clients", "legendFormat": "Redis 连接数", "refId": "C" } ] }, { "id": 7, "title": "消息队列深度", "type": "graph", "gridPos": {"x": 12, "y": 12, "w": 12, "h": 6}, "targets": [ { "expr": "rabbitmq_queue_messages", "legendFormat": "{{ queue }}", "refId": "A" } ] } ] } }

继续下一部分...

2.9.3.3 业务指标 Dashboard`k8s/monitoring/grafana-dashboards/business-metrics.json`

{ "dashboard": { "title": "TestMaster - 业务指标", "tags": ["testmaster", "business"], "timezone": "browser", "schemaVersion": 16, "version": 1, "refresh": "1m", "panels": [ { "id": 1, "title": "测试执行统计", "type": "stat", "gridPos": {"x": 0, "y": 0, "w": 6, "h": 4}, "targets": [ { "expr": "sum(increase(test_executions_total[24h]))", "legendFormat": "24小时执行总数", "refId": "A" } ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "orientation": "auto" } }, { "id": 2, "title": "测试成功率", "type": "gauge", "gridPos": {"x": 6, "y": 0, "w": 6, "h": 4}, "targets": [ { "expr": "sum(rate(test_executions_total{status=\"success\"}[1h])) / sum(rate(test_executions_total[1h]))", "refId": "A" } ], "options": { "showThresholdLabels": false, "showThresholdMarkers": true }, "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "thresholds": { "mode": "absolute", "steps": [ {"color": "red", "value": null}, {"color": "yellow", "value": 0.7}, {"color": "green", "value": 0.9} ] } } } }, { "id": 3, "title": "AI 生成统计", "type": "stat", "gridPos": {"x": 12, "y": 0, "w": 6, "h": 4}, "targets": [ { "expr": "sum(increase(ai_generations_total[24h]))", "legendFormat": "24小时生成总数", "refId": "A" } ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "orientation": "auto" } }, { "id": 4, "title": "AI 生成成功率", "type": "gauge", "gridPos": {"x": 18, "y": 0, "w": 6, "h": 4}, "targets": [ { "expr": "sum(rate(ai_generations_total{status=\"success\"}[1h])) / sum(rate(ai_generations_total[1h]))", "refId": "A" } ], "options": { "showThresholdLabels": false, "showThresholdMarkers": true }, "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "thresholds": { "mode": "absolute", "steps": [ {"color": "red", "value": null}, {"color": "yellow", "value": 0.8}, {"color": "green", "value": 0.95} ] } } } }, { "id": 5, "title": "测试执行趋势", "type": "graph", "gridPos": {"x": 0, "y": 4, "w": 12, "h": 6}, "targets": [ { "expr": "sum(rate(test_executions_total[5m])) by (status)", "legendFormat": "{{ status }}", "refId": "A" } ], "yaxes": [ {"format": "short", "label": "执行/秒"}, {"format": "short"} ] }, { "id": 6, "title": "AI 生成趋势", "type": "graph", "gridPos": {"x": 12, "y": 4, "w": 12, "h": 6}, "targets": [ { "expr": "sum(rate(ai_generations_total[5m])) by (model)", "legendFormat": "{{ model }}", "refId": "A" } ], "yaxes": [ {"format": "short", "label": "生成/秒"}, {"format": "short"} ] }, { "id": 7, "title": "测试执行时间分布", "type": "heatmap", "gridPos": {"x": 0, "y": 10, "w": 12, "h": 6}, "targets": [ { "expr": "sum(rate(test_execution_duration_seconds_bucket[5m])) by (le)", "format": "heatmap", "legendFormat": "{{ le }}", "refId": "A" } ] }, { "id": 8, "title": "并发测试数量", "type": "graph", "gridPos": {"x": 12, "y": 10, "w": 12, "h": 6}, "targets": [ { "expr": "test_executions_running", "legendFormat": "运行中", "refId": "A" }, { "expr": "test_executions_queued", "legendFormat": "队列中", "refId": "B" }, { "expr": "test_executions_total - test_executions_running - test_executions_queued", "legendFormat": "已完成", "refId": "C" } ] }, { "id": 9, "title": "用户活跃度", "type": "graph", "gridPos": {"x": 0, "y": 16, "w": 8, "h": 6}, "targets": [ { "expr": "count(count by (user_id) (user_activity{action=\"login\"}[1h]))", "legendFormat": "活跃用户数", "refId": "A" } ] }, { "id": 10, "title": "项目统计", "type": "stat", "gridPos": {"x": 8, "y": 16, "w": 8, "h": 6}, "targets": [ { "expr": "count(project_info)", "legendFormat": "项目总数", "refId": "A" }, { "expr": "count(project_info{status=\"active\"})", "legendFormat": "活跃项目", "refId": "B" } ] }, { "id": 11, "title": "测试用例统计", "type": "piechart", "gridPos": {"x": 16, "y": 16, "w": 8, "h": 6}, "targets": [ { "expr": "sum(test_cases_total) by (type)", "legendFormat": "{{ type }}", "refId": "A" } ], "options": { "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] }, "pieType": "donut" } } ] } }

2.9.3.4 Grafana Dashboard 配置 ConfigMap`k8s/monitoring/grafana-dashboards.yaml`

# TestMaster 自动化测试平台 - Grafana Dashboards ConfigMap # 版本: 1.0.0 apiVersion: v1 kind: ConfigMap metadata: name: testmaster-grafana-dashboards namespace: testmaster labels: app: testmaster component: grafana grafana_dashboard: "1" data: system-overview.json: | # 这里放置 system-overview.json 的完整内容 application-performance.json: | # 这里放置 application-performance.json 的完整内容 business-metrics.json: | # 这里放置 business-metrics.json 的完整内容 database-monitoring.json: | { "dashboard": { "title": "TestMaster - 数据库监控", "tags": ["testmaster", "database"], "panels": [ { "id": 1, "title": "PostgreSQL 连接数", "type": "graph", "targets": [ { "expr": "pg_stat_activity_count", "legendFormat": "活跃连接" } ] }, { "id": 2, "title": "PostgreSQL 查询性能", "type": "graph", "targets": [ { "expr": "rate(pg_stat_statements_mean_exec_time[5m])", "legendFormat": "平均查询时间" } ] }, { "id": 3, "title": "MongoDB 操作速率", "type": "graph", "targets": [ { "expr": "rate(mongodb_op_counters_total[5m])", "legendFormat": "{{ type }}" } ] }, { "id": 4, "title": "Redis 命令执行", "type": "graph", "targets": [ { "expr": "rate(redis_commands_processed_total[5m])", "legendFormat": "命令/秒" } ] } ] } } selenium-grid.json: | { "dashboard": { "title": "TestMaster - Selenium Grid", "tags": ["testmaster", "selenium"], "panels": [ { "id": 1, "title": "Selenium Node 状态", "type": "stat", "targets": [ { "expr": "selenium_grid_node_count", "legendFormat": "可用节点" } ] }, { "id": 2, "title": "活跃会话", "type": "graph", "targets": [ { "expr": "selenium_grid_active_sessions", "legendFormat": "{{ browser }}" } ] }, { "id": 3, "title": "会话队列", "type": "graph", "targets": [ { "expr": "selenium_grid_session_queue_size", "legendFormat": "等待中" } ] }, { "id": 4, "title": "浏览器分布", "type": "piechart", "targets": [ { "expr": "sum(selenium_grid_sessions_total) by (browser)", "legendFormat": "{{ browser }}" } ] } ] } } --- # Grafana 数据源配置 apiVersion: v1 kind: ConfigMap metadata: name: testmaster-grafana-datasources namespace: testmaster labels: app: testmaster component: grafana data: datasources.yaml: | apiVersion: 1 datasources: - name: Prometheus type: prometheus access: proxy url: http://testmaster-prometheus:9090 isDefault: true editable: true jsonData: timeInterval: "15s" queryTimeout: "60s" httpMethod: "POST" - name: Loki type: loki access: proxy url: http://testmaster-loki:3100 editable: true jsonData: maxLines: 1000 - name: PostgreSQL type: postgres access: proxy url: testmaster-postgres:5432 database: testmaster user: testmaster secureJsonData: password: ${POSTGRES_PASSWORD} jsonData: sslmode: disable postgresVersion: 1500 - name: MongoDB type: grafana-mongodb-datasource access: proxy url: mongodb://testmaster-mongodb:27017 database: testmaster secureJsonData: password: ${MONGODB_PASSWORD} --- # 更新 Grafana StatefulSet 以挂载 Dashboards apiVersion: apps/v1 kind: StatefulSet metadata: name: testmaster-grafana namespace: testmaster labels: app: testmaster component: grafana spec: serviceName: testmaster-grafana replicas: 1 selector: matchLabels: app: testmaster component: grafana template: metadata: labels: app: testmaster component: grafana spec: containers: - name: grafana image: grafana/grafana:latest imagePullPolicy: IfNotPresent ports: - containerPort: 3000 name: grafana env: - name: GF_SECURITY_ADMIN_USER value: admin - name: GF_SECURITY_ADMIN_PASSWORD valueFrom: secretKeyRef: name: testmaster-app-secrets key: GRAFANA_ADMIN_PASSWORD - name: GF_INSTALL_PLUGINS value: grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-mongodb-datasource - name: GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH value: /var/lib/grafana/dashboards/system-overview.json - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: POSTGRES_PASSWORD - name: MONGODB_PASSWORD valueFrom: secretKeyRef: name: testmaster-db-secrets key: MONGODB_PASSWORD volumeMounts: - name: grafana-data mountPath: /var/lib/grafana - name: grafana-datasources mountPath: /etc/grafana/provisioning/datasources - name: grafana-dashboards-config mountPath: /etc/grafana/provisioning/dashboards - name: grafana-dashboards mountPath: /var/lib/grafana/dashboards resources: requests: cpu: 250m memory: 512Mi limits: cpu: 1000m memory: 1Gi livenessProbe: httpGet: path: /api/health port: 3000 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /api/health port: 3000 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 volumes: - name: grafana-datasources configMap: name: testmaster-grafana-datasources - name: grafana-dashboards-config configMap: name: testmaster-grafana-dashboard-config - name: grafana-dashboards configMap: name: testmaster-grafana-dashboards volumeClaimTemplates: - metadata: name: grafana-data spec: accessModes: ["ReadWriteOnce"] storageClassName: testmaster-standard resources: requests: storage: 10Gi --- # Grafana Dashboard 自动加载配置 apiVersion: v1 kind: ConfigMap metadata: name: testmaster-grafana-dashboard-config namespace: testmaster labels: app: testmaster component: grafana data: dashboards.yaml: | apiVersion: 1 providers: - name: 'TestMaster Dashboards' orgId: 1 folder: 'TestMaster' type: file disableDeletion: false updateIntervalSeconds: 30 allowUiUpdates: true options: path: /var/lib/grafana/dashboards

2.9.4 监控部署脚本

2.9.4.1 监控系统部署脚本`k8s/monitoring/deploy-monitoring.sh`

#!/bin/bash # TestMaster 自动化测试平台 - 监控系统部署脚本 # 版本: 1.0.0 set -e # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' print_message() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } print_header() { echo "" echo "============================================================================" echo " $1" echo "============================================================================" echo "" } # 部署 Prometheus deploy_prometheus() { print_message $YELLOW "📊 部署 Prometheus..." # 创建 Prometheus 配置 kubectl apply -f prometheus-rules.yaml kubectl apply -f prometheus.yaml # 等待 Prometheus 就绪 print_message $YELLOW "⏳ 等待 Prometheus 就绪..." kubectl wait --for=condition=ready pod -l component=prometheus -n testmaster --timeout=300s print_message $GREEN "✅ Prometheus 部署完成" } # 部署 Alertmanager deploy_alertmanager() { print_message $YELLOW "🔔 部署 Alertmanager..." kubectl apply -f alertmanager.yaml # 等待 Alertmanager 就绪 print_message $YELLOW "⏳ 等待 Alertmanager 就绪..." kubectl wait --for=condition=ready pod -l component=alertmanager -n testmaster --timeout=300s print_message $GREEN "✅ Alertmanager 部署完成" } # 部署 Grafana deploy_grafana() { print_message $YELLOW "📈 部署 Grafana..." # 创建 Grafana 配置 kubectl apply -f grafana-dashboards.yaml # 等待 Grafana 就绪 print_message $YELLOW "⏳ 等待 Grafana 就绪..." kubectl wait --for=condition=ready pod -l component=grafana -n testmaster --timeout=300s print_message $GREEN "✅ Grafana 部署完成" } # 配置告警规则 configure_alerts() { print_message $YELLOW "⚙️ 配置告警规则..." # 重新加载 Prometheus 配置 kubectl exec -n testmaster testmaster-prometheus-0 -- \ curl -X POST http://localhost:9090/-/reload print_message $GREEN "✅ 告警规则配置完成" } # 导入 Grafana Dashboards import_dashboards() { print_message $YELLOW "📊 导入 Grafana Dashboards..." # 等待 Grafana 完全启动 sleep 10 # Grafana 会自动加载 ConfigMap 中的 Dashboards print_message $GREEN "✅ Dashboards 导入完成" } # 显示访问信息 show_access_info() { print_header "监控系统访问信息" # 获取 Grafana 访问地址 GRAFANA_IP=$(kubectl get svc testmaster-grafana -n testmaster -o jsonpath='{.status.loadBalancer.ingress[0].ip}') if [ -z "$GRAFANA_IP" ]; then GRAFANA_IP=$(kubectl get svc testmaster-grafana -n testmaster -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') fi if [ -z "$GRAFANA_IP" ]; then GRAFANA_IP="<pending>" fi echo "📈 Grafana:" echo " URL: http://$GRAFANA_IP:3000" echo " 用户名: admin" echo " 密码: admin (首次登录后请修改)" echo "" echo "📊 Prometheus:" echo " URL: http://testmaster-prometheus:9090 (集群内访问)" echo " 端口转发: kubectl port-forward -n testmaster svc/testmaster-prometheus 9090:9090" echo "" echo "🔔 Alertmanager:" echo " URL: http://testmaster-alertmanager:9093 (集群内访问)" echo " 端口转发: kubectl port-forward -n testmaster svc/testmaster-alertmanager 9093:9093" echo "" } # 测试告警 test_alerts() { print_message $YELLOW "🧪 测试告警系统..." # 触发一个测试告警 cat <<EOF | kubectl apply -f - apiVersion: v1 kind: Pod metadata: name: test-alert-pod namespace: testmaster labels: app: test-alert spec: containers: - name: test image: busybox command: ['sh', '-c', 'exit 1'] restartPolicy: Never EOF sleep 5 print_message $GREEN "✅ 测试告警已触发，请检查 Alertmanager" # 清理测试 Pod kubectl delete pod test-alert-pod -n testmaster --ignore-not-found=true } # 主函数 main() { print_header "TestMaster 监控系统部署" local command=${1:-deploy} case $command in deploy) deploy_prometheus deploy_alertmanager deploy_grafana configure_alerts import_dashboards show_access_info print_header "监控系统部署完成" print_message $GREEN "✅ 监控系统已成功部署" ;; test) test_alerts ;; reload) configure_alerts print_message $GREEN "✅ 配置已重新加载" ;; status) kubectl get pods -n testmaster -l 'component in (prometheus,alertmanager,grafana)' ;; logs) local component=${2:-prometheus} kubectl logs -f -l component=$component -n testmaster ;; *) echo "用法: $0 {deploy|test|reload|status|logs} [component]" exit 1 ;; esac } main "$@"

2.9.5 监控系统使用文档`docs/monitoring-guide.md`

# TestMaster 监控系统使用指南 ## 1. 概述 TestMaster 监控系统基于 Prometheus + Grafana + Alertmanager 构建，提供全方位的系统监控和告警能力。 ## 2. 监控架构

┌─────────────────────────────────────────────────────────────┐
│ 监控数据流 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 应用服务 ──► Prometheus ──► Grafana (可视化) │
│ │ │ │
│ │ └──► Alertmanager ──► 告警通知 │
│ │ │
│ └──► 日志 ──► Loki ──► Grafana (日志查询) │
│ │
└─────────────────────────────────────────────────────────────┘

## 3. 监控指标 ### 3.1 系统级指标 - **Pod 状态**: 运行状态、重启次数 - **资源使用**: CPU、内存、磁盘、网络 - **节点状态**: 节点健康、资源压力 ### 3.2 应用级指标 - **请求速率**: QPS、TPS - **响应时间**: P50、P95、P99 - **错误率**: 4xx、5xx 错误 - **并发数**: 活跃连接、线程池 ### 3.3 业务指标 - **测试执行**: 执行总数、成功率、失败率 - **AI 生成**: 生成总数、成功率、模型分布 - **用户活跃**: 登录用户、活跃项目 - **资源消耗**: 测试时长、资源使用 ### 3.4 数据库指标 - **连接池**: 活跃连接、空闲连接 - **查询性能**: 慢查询、查询时间 - **缓存命中**: 命中率、驱逐率 ## 4. Dashboard 说明 ### 4.1 系统概览 Dashboard - **用途**: 快速了解系统整体健康状况 - **关键指标**: - 服务健康状态 - CPU/内存使用率 - 请求速率和错误率 - Pod 状态分布 ### 4.2 应用性能 Dashboard - **用途**: 监控应用性能和响应时间 - **关键指标**: - 各服务请求速率 - 响应时间分布 - 数据库连接池状态 - 消息队列深度 ### 4.3 业务指标 Dashboard - **用途**: 跟踪业务关键指标 - **关键指标**: - 测试执行统计 - AI 生成统计 - 用户活跃度 - 项目和用例分布 ### 4.4 数据库监控 Dashboard - **用途**: 监控数据库性能 - **关键指标**: - 连接数和查询性能 - 缓存命中率 - 慢查询分析 ## 5. 告警规则 ### 5.1 严重级别告警 - **PodNotReady**: Pod 不健康超过 5 分钟 - **ServiceDown**: 服务完全不可用 - **HighErrorRate**: 5xx 错误率超过 5% - **DatabaseDown**: 数据库不可用 ### 5.2 警告级别告警 - **HighCPUUsage**: CPU 使用率超过 80% - **HighMemoryUsage**: 内存使用率超过 80% - **HighResponseTime**: P95 响应时间超过 2 秒 - **RabbitMQQueueBacklog**: 消息队列堆积超过 10000 条 ### 5.3 业务告警 - **HighTestFailureRate**: 测试失败率超过 30% - **HighAIGenerationFailureRate**: AI 生成失败率超过 20% - **LongTestExecutionTime**: P95 测试时间超过 10 分钟 ## 6. 告警通知 ### 6.1 通知渠道 - **Email**: 发送到团队邮箱 - **Slack**: 发送到指定频道 - **Microsoft Teams**: 发送到 Teams 频道 - **Webhook**: 自定义 Webhook 集成 ### 6.2 告警分组 告警按以下维度分组： - **category**: 告警类别（system、application、database、business） - **severity**: 严重程度（critical、warning、info） - **service**: 服务名称 ### 6.3 告警抑制 - 节点不可达时，抑制该节点上的 Pod 告警 - 服务完全不可用时，抑制高错误率告警 ## 7. 使用示例 ### 7.1 查看实时指标 ```bash # 访问 Prometheus kubectl port-forward -n testmaster svc/testmaster-prometheus 9090:9090 # 访问 Grafana kubectl port-forward -n testmaster svc/testmaster-grafana 3000:3000

7.2 查询 PromQL 示例

# 查询 Gateway 的 QPS sum(rate(http_requests_total{service="gateway"}[5m])) # 查询 P95 响应时间 histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, service) ) # 查询测试成功率 sum(rate(test_executions_total{status="success"}[1h])) / sum(rate(test_executions_total[1h]))

7.3 配置自定义告警

编辑prometheus-rules.yaml添加新的告警规则：

- alert: CustomAlert expr: your_metric > threshold for: 5m labels: severity: warning category: custom annotations: summary: "自定义告警" description: "详细描述"

然后重新加载配置：

kubectl exec -n testmaster testmaster-prometheus-0 -- \ curl -X POST http://localhost:9090/-/reload

8. 故障排查

8.1 Prometheus 无法抓取指标

# 检查 Service 是否存在 kubectl get svc -n testmaster # 检查 Pod 是否运行 kubectl get pods -n testmaster # 查看 Prometheus 日志 kubectl logs -n testmaster testmaster-prometheus-0 # 检查 Prometheus targets # 访问 http://localhost:9090/targets

8.2 Grafana Dashboard 无数据

# 检查数据源配置 # Grafana UI -> Configuration -> Data Sources # 测试 Prometheus 连接 # 在 Data Source 页面点击 "Test" # 检查 Dashboard 时间范围 # 确保选择了正确的时间范围

8.3 告警未触发

# 检查告警规则 kubectl exec -n testmaster testmaster-prometheus-0 -- \ wget -qO- http://localhost:9090/api/v1/rules # 检查 Alertmanager 配置 kubectl logs -n testmaster testmaster-alertmanager-0 # 测试告警 kubectl apply -f test-alert.yaml

9. 最佳实践

9.1 监控指标设计

使用有意义的指标名称
添加适当的标签
避免高基数标签
定期清理无用指标

9.2 告警规则设计

设置合理的阈值
避免告警风暴
使用告警抑制
定期审查告警规则

9.3 Dashboard 设计

关键指标置顶
使用合适的图表类型
添加说明文档
定期更新和优化

10. 参考资源

Prometheus 官方文档
Grafana 官方文档
Alertmanager 官方文档
PromQL 查询语言

## 监控系统配置总结 ### ✅ 完整的监控系统方案 #### 1. **Prometheus 监控** - ✅ 完整的告警规则（系统、应用、资源、数据库、消息队列、Selenium、业务、存储） - ✅ 多维度指标采集 - ✅ 自动服务发现 - ✅ RBAC 权限配置 #### 2. **Alertmanager 告警** - ✅ 多渠道通知（Email、Slack、Teams、Webhook） - ✅ 告警分组和路由 - ✅ 告警抑制规则 - ✅ 高可用部署（3副本） #### 3. **Grafana 可视化** - ✅ 5个专业 Dashboard（系统概览、应用性能、业务指标、数据库、Selenium） - ✅ 自动加载 Dashboard - ✅ 多数据源支持 - ✅ 丰富的图表类型 #### 4. **监控指标覆盖** - ✅ 系统级（Pod、Node、资源） - ✅ 应用级（QPS、响应时间、错误率） - ✅ 业务级（测试执行、AI生成、用户活跃） - ✅ 数据库（连接池、查询性能、缓存） - ✅ 中间件（消息队列、对象存储） - ✅ Selenium Grid（节点、会话、队列） #### 5. **自动化脚本** - ✅ 监控系统部署脚本 - ✅ 告警测试脚本 - ✅ 配置重载脚本 #### 6. **文档** - ✅ 完整的使用指南 - ✅ 故障排查手册 - ✅ 最佳实践

TestMaster 自动化测试平台 - 第六部分：CI/CD 集成配置

2.6 CI/CD 集成模块

2.6.1 Jenkins Pipeline 配置Jenkinsfile

2.6.2 GitLab CI 配置.gitlab-ci.yml

2.6.3 GitHub Actions 配置.github/workflows/ci-cd.yml

TestMaster 自动化测试平台 - 第七部分：Docker Compose 完整配置

2.7 Docker Compose 配置

2.7.1 主配置文件docker-compose.yml

2.7.2 生产环境配置docker-compose.prod.yml

2.7.3 开发环境配置docker-compose.dev.yml

2.7.4 测试环境配置docker-compose.test.yml

2.7.5 环境变量配置.env.example

2.7.6 快速启动脚本scripts/docker-start.sh

2.7.7 健康检查脚本scripts/health-check.sh

TestMaster 自动化测试平台 - 第八部分：Kubernetes 完整部署配置

2.8 Kubernetes 部署配置

2.8.1 命名空间配置k8s/namespace.yaml

2.8.2 ConfigMap 配置

2.8.2.1 应用配置k8s/configmaps/app-config.yaml

2.8.3 Secret 配置k8s/secrets/secrets.yaml

2.8.4 持久化存储配置

2.8.4.1 StorageClassk8s/storage/storage-class.yaml

2.8.4.2 PersistentVolumeClaimk8s/storage/pvc.yaml

2.8.5 数据库部署配置

2.8.5.1 PostgreSQLk8s/databases/postgres.yaml

2.8.5.2 MongoDBk8s/databases/mongodb.yaml

2.8.5.3 Redisk8s/databases/redis.yaml

2.8.6 消息队列和存储服务

2.8.6.1 RabbitMQk8s/services/rabbitmq.yaml

2.8.6.2 MinIOk8s/services/minio.yaml

2.8.7 Selenium Grid 部署

2.8.7.1 Selenium Hubk8s/selenium/hub.yaml

2.8.7.2 Chrome Nodek8s/selenium/chrome-node.yaml

2.8.7.3 Firefox Nodek8s/selenium/firefox-node.yaml

2.8.8 后端服务部署

2.8.8.1 Gatewayk8s/backend/gateway.yaml

2.8.8.2 AI Generatork8s/backend/ai-generator.yaml

2.8.8.3 Executork8s/backend/executor.yaml

2.8.9 前端和Nginx部署

2.8.9.1 Frontendk8s/frontend/frontend.yaml

2.8.9.2 Nginxk8s/nginx/nginx.yaml

2.8.10 Ingress 配置k8s/ingress/ingress.yaml

2.8.11 监控系统部署

2.8.11.1 Prometheusk8s/monitoring/prometheus.yaml

2.8.11.2 Grafanak8s/monitoring/grafana.yaml

2.8.12 部署脚本

2.8.12.1 一键部署脚本k8s/deploy.sh

2.8.12.2 健康检查脚本k8s/health-check.sh

TestMaster 自动化测试平台 - 第九部分：监控系统完整配置

2.9 监控系统完整配置

2.9.1 Prometheus Rules 配置

2.9.1.1 告警规则k8s/monitoring/prometheus-rules.yaml

2.9.2 Alertmanager 配置

2.9.2.1 Alertmanager 部署k8s/monitoring/alertmanager.yaml

2.9.3 Grafana Dashboards 配置

2.9.3.1 系统概览 Dashboardk8s/monitoring/grafana-dashboards/system-overview.json

2.9.3.2 应用性能 Dashboardk8s/monitoring/grafana-dashboards/application-performance.json

2.9.3.3 业务指标 Dashboardk8s/monitoring/grafana-dashboards/business-metrics.json

2.9.3.4 Grafana Dashboard 配置 ConfigMapk8s/monitoring/grafana-dashboards.yaml

2.9.4 监控部署脚本

2.9.4.1 监控系统部署脚本k8s/monitoring/deploy-monitoring.sh

2.9.5 监控系统使用文档docs/monitoring-guide.md

7.2 查询 PromQL 示例

7.3 配置自定义告警

8. 故障排查

8.1 Prometheus 无法抓取指标

8.2 Grafana Dashboard 无数据

8.3 告警未触发

9. 最佳实践

9.1 监控指标设计

9.2 告警规则设计

9.3 Dashboard 设计

10. 参考资源

Plotly高级可视化库的使用方法（三）

推荐一个基于 C# 开发的高性能 IP 地址数据库

使用 Gemini（又称 Nano Banana 和 Nano Banana Pro）生成图片

Mini 出品，必属精品，MiniAuth 又一个 .NET 权限认证项目开源了！

Excalidraw支持导出SVG/PNG？一文掌握所有导出技巧

重新测试 AOD2544 高频MOS管制作的半桥电路

2.6.1 Jenkins Pipeline 配置`Jenkinsfile`

2.6.2 GitLab CI 配置`.gitlab-ci.yml`

2.6.3 GitHub Actions 配置`.github/workflows/ci-cd.yml`

2.7.1 主配置文件`docker-compose.yml`

2.7.2 生产环境配置`docker-compose.prod.yml`

2.7.3 开发环境配置`docker-compose.dev.yml`

2.7.4 测试环境配置`docker-compose.test.yml`

2.7.5 环境变量配置`.env.example`

2.7.6 快速启动脚本`scripts/docker-start.sh`

2.7.7 健康检查脚本`scripts/health-check.sh`

2.8.1 命名空间配置`k8s/namespace.yaml`

2.8.2.1 应用配置`k8s/configmaps/app-config.yaml`

2.8.3 Secret 配置`k8s/secrets/secrets.yaml`

2.8.4.1 StorageClass`k8s/storage/storage-class.yaml`

2.8.4.2 PersistentVolumeClaim`k8s/storage/pvc.yaml`

2.8.5.1 PostgreSQL`k8s/databases/postgres.yaml`

2.8.5.2 MongoDB`k8s/databases/mongodb.yaml`

2.8.5.3 Redis`k8s/databases/redis.yaml`

2.8.6.1 RabbitMQ`k8s/services/rabbitmq.yaml`

2.8.6.2 MinIO`k8s/services/minio.yaml`

2.8.7.1 Selenium Hub`k8s/selenium/hub.yaml`

2.8.7.2 Chrome Node`k8s/selenium/chrome-node.yaml`

2.8.7.3 Firefox Node`k8s/selenium/firefox-node.yaml`

2.8.8.1 Gateway`k8s/backend/gateway.yaml`

2.8.8.2 AI Generator`k8s/backend/ai-generator.yaml`

2.8.8.3 Executor`k8s/backend/executor.yaml`

2.8.9.1 Frontend`k8s/frontend/frontend.yaml`

2.8.9.2 Nginx`k8s/nginx/nginx.yaml`

2.8.10 Ingress 配置`k8s/ingress/ingress.yaml`

2.8.11.1 Prometheus`k8s/monitoring/prometheus.yaml`

2.8.11.2 Grafana`k8s/monitoring/grafana.yaml`

2.8.12.1 一键部署脚本`k8s/deploy.sh`

2.8.12.2 健康检查脚本`k8s/health-check.sh`

2.9.1.1 告警规则`k8s/monitoring/prometheus-rules.yaml`

2.9.2.1 Alertmanager 部署`k8s/monitoring/alertmanager.yaml`

2.9.3.1 系统概览 Dashboard`k8s/monitoring/grafana-dashboards/system-overview.json`

2.9.3.2 应用性能 Dashboard`k8s/monitoring/grafana-dashboards/application-performance.json`

2.9.3.3 业务指标 Dashboard`k8s/monitoring/grafana-dashboards/business-metrics.json`

2.9.3.4 Grafana Dashboard 配置 ConfigMap`k8s/monitoring/grafana-dashboards.yaml`

2.9.4.1 监控系统部署脚本`k8s/monitoring/deploy-monitoring.sh`

2.9.5 监控系统使用文档`docs/monitoring-guide.md`