openapi: 3.0.3
info:
  title: MedScrub Platform API
  description: |
    MedScrub Platform API for healthcare data de-identification, API key management, and analytics.

    ## Authentication

    The API supports two authentication methods:

    1. **JWT Token** (Recommended)
       ```
       Authorization: Bearer <jwt_token>
       ```
       - Get your JWT token from https://medscrub.ai/demo
       - Tokens expire after 24 hours
       - Rate limit: 100 requests/hour per user

    2. **API Key** (Fallback)
       ```
       X-API-Key: msk_...
       ```
       - Manage your API keys at https://medscrub.ai/dashboard/keys
       - Rate limit: Based on subscription tier

    ## Rate Limiting

    All authenticated endpoints are rate limited. Rate limit headers are included in responses:

    - `X-RateLimit-Limit`: Maximum requests allowed
    - `X-RateLimit-Remaining`: Remaining requests in current window
    - `X-RateLimit-Reset`: Time when rate limit resets (Unix timestamp)

    ## CORS

    The API supports CORS for web applications. Allowed origins:
    - https://medscrub.ai
    - https://app.medscrub.ai
    - http://localhost:3000 (development only)

  version: 1.0.0
  contact:
    name: MedScrub Support
    url: https://medscrub.ai
    email: support@medscrub.ai
  license:
    name: Proprietary
    url: https://medscrub.ai/terms

servers:
  - url: https://medscrub.ai/api
    description: Production server
  - url: http://localhost:3000/api
    description: Development server

tags:
  - name: Health
    description: Health check endpoints
  - name: Authentication
    description: API key validation and JWT token management
  - name: De-identification
    description: PHI de-identification and re-identification
  - name: Waitlist
    description: Waitlist management
  - name: Analytics
    description: Usage analytics and statistics
  - name: User
    description: User profile and usage management

paths:
  /health:
    get:
      tags:
        - Health
      summary: Platform health check
      description: Returns the health status of the MedScrub Platform
      operationId: getHealth
      responses:
        '200':
          description: Platform is healthy
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HealthResponse'
              example:
                status: healthy
                timestamp: '2024-10-19T12:00:00Z'
                service: 'MedScrub Platform'

  /health/db:
    get:
      tags:
        - Health
      summary: Database health check
      description: Checks the health of the database connection
      operationId: getHealthDb
      responses:
        '200':
          description: Database is healthy
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    enum: [healthy, unhealthy]
                  database:
                    type: string
                    enum: [connected, disconnected]
                  timestamp:
                    type: string
                    format: date-time
        '500':
          description: Database is unhealthy
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /health/worker:
    get:
      tags:
        - Health
      summary: Proxy health check
      description: Checks the health of the proxy API
      operationId: getHealthWorker
      responses:
        '200':
          description: Worker is healthy
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    enum: [healthy, unhealthy]
                  worker:
                    type: string
                    enum: [reachable, unreachable]
                  latency:
                    type: number
                    description: Response time in milliseconds
                  timestamp:
                    type: string
                    format: date-time
        '500':
          description: Worker is unhealthy
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /validate:
    post:
      tags:
        - Authentication
      summary: Validate API key
      description: Validates an API key and returns subscription information
      operationId: validateApiKey
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ValidateRequest'
            example:
              apiKey: 'msk_test_demokey123456789012345678901234'
      responses:
        '200':
          description: API key is valid
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ValidateResponse'
              example:
                valid: true
                userId: 'demo@medscrub.ai'
                tier: 'professional'
                limits:
                  requestsPerMonth: 100000
                  requestsPerSecond: 50
                  currentUsage: 42
                subscription:
                  active: true
                  tier: 'professional'
        '400':
          description: Invalid request data
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                valid: false
                error: 'Invalid request data'
                details: 'API key is required'
        '401':
          description: Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                valid: false
                error: 'Invalid API key'
        '429':
          description: Rate limit exceeded
          content:
            application/json:
              schema:
                type: object
                properties:
                  valid:
                    type: boolean
                  error:
                    type: string
                  limit:
                    type: number
                  used:
                    type: number
              example:
                valid: false
                error: 'Monthly request limit exceeded'
                limit: 100000
                used: 100042
    get:
      tags:
        - Authentication
      summary: Validate endpoint health
      description: Returns validation service health status
      operationId: getValidateHealth
      responses:
        '200':
          description: Validation service is healthy
          content:
            application/json:
              schema:
                type: object
                properties:
                  service:
                    type: string
                  status:
                    type: string
                  timestamp:
                    type: string
                    format: date-time

  /deidentify-hybrid:
    post:
      tags:
        - De-identification
      summary: De-identify text containing PHI
      description: |
        De-identifies text by detecting and replacing PHI with tokens.
        Supports all 18 HIPAA Safe Harbor identifiers.
      operationId: deidentifyText
      security:
        - bearerAuth: []
        - apiKeyAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DeidentifyRequest'
            example:
              text: 'Patient John Doe (MRN: 123456) visited on 2024-01-15. Contact: john.doe@email.com, (555) 123-4567'
              sessionId: 'session_abc123'
      responses:
        '200':
          description: Text successfully de-identified
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeidentifyResponse'
              example:
                deidentifiedText: 'Patient [NAME_1] (MRN: [MRN_1]) visited on [DATE_1]. Contact: [EMAIL_1], [PHONE_1]'
                sessionId: 'session_abc123'
                detectedEntities:
                  - type: 'name'
                    original: 'John Doe'
                    token: '[NAME_1]'
                    startIndex: 8
                    endIndex: 16
                    confidence: 0.95
                  - type: 'mrn'
                    original: '123456'
                    token: '[MRN_1]'
                    startIndex: 23
                    endIndex: 29
                    confidence: 0.99
                processingTime: 42
                confidence: 0.94
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '500':
          description: Server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /waitlist:
    post:
      tags:
        - Waitlist
      summary: Join waitlist
      description: Adds an email address to the waitlist
      operationId: joinWaitlist
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/WaitlistRequest'
            example:
              email: 'user@example.com'
              source: 'landing'
              metadata:
                referrer: 'twitter'
      responses:
        '200':
          description: Successfully added to waitlist
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/WaitlistResponse'
              example:
                success: true
                message: 'Successfully added to waitlist'
                position: 42
                email: 'user@example.com'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              examples:
                invalidEmail:
                  value:
                    success: false
                    error: 'Invalid email address'
                alreadyExists:
                  value:
                    success: false
                    error: 'Email already on waitlist'
                    position: 42
        '500':
          description: Server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /jwt:
    get:
      tags:
        - Authentication
      summary: Get JWT token
      description: |
        Returns a JWT token for authenticated users.
        Requires NextAuth.js session.
      operationId: getJwtToken
      security:
        - cookieAuth: []
      responses:
        '200':
          description: JWT token generated successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JwtResponse'
              example:
                token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...'
                expiresAt: '2024-10-20T12:00:00Z'
                userId: 'user123'
                email: 'user@example.com'
        '401':
          description: Not authenticated
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                error: 'Not authenticated'

  /user/usage:
    get:
      tags:
        - User
      summary: Get usage statistics
      description: |
        Returns usage statistics for the authenticated user.
        Requires authentication.
      operationId: getUserUsage
      security:
        - bearerAuth: []
        - cookieAuth: []
      parameters:
        - name: startDate
          in: query
          description: Start date for usage statistics (ISO 8601)
          schema:
            type: string
            format: date
          example: '2024-10-01'
        - name: endDate
          in: query
          description: End date for usage statistics (ISO 8601)
          schema:
            type: string
            format: date
          example: '2024-10-19'
      responses:
        '200':
          description: Usage statistics retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/UsageResponse'
              example:
                usage:
                  totalRequests: 1234
                  successfulRequests: 1200
                  failedRequests: 34
                  avgResponseTime: 125
                  requestsByDay:
                    - date: '2024-10-15'
                      count: 42
                    - date: '2024-10-16'
                      count: 38
                tier: 'professional'
                limits:
                  requestsPerMonth: 100000
                  requestsPerSecond: 50
                  currentUsage: 1234
        '401':
          description: Not authenticated
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: JWT token obtained from /api/jwt endpoint

    apiKeyAuth:
      type: apiKey
      in: header
      name: X-API-Key
      description: API key in format msk_...

    cookieAuth:
      type: apiKey
      in: cookie
      name: next-auth.session-token
      description: NextAuth.js session cookie

  schemas:
    HealthResponse:
      type: object
      required:
        - status
        - timestamp
        - service
      properties:
        status:
          type: string
          enum: [healthy, degraded, unhealthy]
          description: Health status of the service
        timestamp:
          type: string
          format: date-time
          description: Timestamp of health check
        service:
          type: string
          description: Service name

    ErrorResponse:
      type: object
      required:
        - error
      properties:
        error:
          type: string
          description: Error message
        details:
          type: string
          description: Additional error details
        requestId:
          type: string
          description: Unique request identifier for debugging

    ValidateRequest:
      type: object
      required:
        - apiKey
      properties:
        apiKey:
          type: string
          pattern: '^msk_[a-zA-Z0-9]{40}$'
          description: API key to validate
          example: 'msk_test_demokey123456789012345678901234'

    ValidateResponse:
      type: object
      required:
        - valid
      properties:
        valid:
          type: boolean
          description: Whether the API key is valid
        userId:
          type: string
          description: User ID associated with the API key
        tier:
          type: string
          enum: [starter, professional, enterprise]
          description: Subscription tier
        limits:
          type: object
          properties:
            requestsPerMonth:
              type: number
              description: Monthly request limit (-1 for unlimited)
            requestsPerSecond:
              type: number
              description: Rate limit in requests per second
            currentUsage:
              type: number
              description: Current monthly usage
        subscription:
          type: object
          properties:
            active:
              type: boolean
              description: Whether subscription is active
            tier:
              type: string
              enum: [starter, professional, enterprise]
              description: Subscription tier

    DeidentifyRequest:
      type: object
      required:
        - text
      properties:
        text:
          type: string
          description: Text containing PHI to de-identify
          example: 'Patient John Doe visited on 2024-01-15'
        sessionId:
          type: string
          description: Optional session ID for re-identification
          example: 'session_abc123'
        options:
          type: object
          properties:
            confidenceThreshold:
              type: number
              minimum: 0
              maximum: 1
              default: 0.7
              description: Minimum confidence threshold for entity detection

    DeidentifyResponse:
      type: object
      required:
        - deidentifiedText
        - sessionId
        - detectedEntities
        - processingTime
        - confidence
      properties:
        deidentifiedText:
          type: string
          description: De-identified text with tokens
        sessionId:
          type: string
          description: Session ID for re-identification
        detectedEntities:
          type: array
          items:
            $ref: '#/components/schemas/DetectedEntity'
        processingTime:
          type: number
          description: Processing time in milliseconds
        confidence:
          type: number
          minimum: 0
          maximum: 1
          description: Overall confidence score

    DetectedEntity:
      type: object
      required:
        - type
        - original
        - token
        - startIndex
        - endIndex
        - confidence
      properties:
        type:
          type: string
          enum:
            - name
            - date
            - phone
            - ssn
            - email
            - address
            - mrn
            - age_over_89
            - zip_code
            - device_id
            - url
            - ip_address
            - biometric
            - photo
            - account_number
          description: Type of PHI detected (HIPAA Safe Harbor identifiers)
        original:
          type: string
          description: Original PHI value
        token:
          type: string
          description: Replacement token
        startIndex:
          type: number
          description: Start position in original text
        endIndex:
          type: number
          description: End position in original text
        confidence:
          type: number
          minimum: 0
          maximum: 1
          description: Confidence score for detection

    WaitlistRequest:
      type: object
      required:
        - email
      properties:
        email:
          type: string
          format: email
          description: Email address to add to waitlist
        source:
          type: string
          default: 'landing'
          description: Source of waitlist signup
        metadata:
          type: object
          additionalProperties: true
          description: Additional metadata

    WaitlistResponse:
      type: object
      required:
        - success
        - message
      properties:
        success:
          type: boolean
          description: Whether the operation was successful
        message:
          type: string
          description: Success or error message
        position:
          type: number
          description: Position in waitlist
        email:
          type: string
          format: email
          description: Email address added

    JwtResponse:
      type: object
      required:
        - token
        - expiresAt
      properties:
        token:
          type: string
          description: JWT token
        expiresAt:
          type: string
          format: date-time
          description: Token expiration time
        userId:
          type: string
          description: User ID
        email:
          type: string
          format: email
          description: User email

    UsageResponse:
      type: object
      required:
        - usage
        - tier
        - limits
      properties:
        usage:
          type: object
          properties:
            totalRequests:
              type: number
              description: Total number of requests
            successfulRequests:
              type: number
              description: Number of successful requests
            failedRequests:
              type: number
              description: Number of failed requests
            avgResponseTime:
              type: number
              description: Average response time in milliseconds
            requestsByDay:
              type: array
              items:
                type: object
                properties:
                  date:
                    type: string
                    format: date
                  count:
                    type: number
        tier:
          type: string
          enum: [starter, professional, enterprise]
          description: Subscription tier
        limits:
          type: object
          properties:
            requestsPerMonth:
              type: number
            requestsPerSecond:
              type: number
            currentUsage:
              type: number
