FROM swr.cn-north-4.myhuaweicloud.com/openubmc/ubuntu:24.04.2
ARG SDK_ARCHIVE

WORKDIR /home/workspace/tool

# 设置环境变量，避免交互式配置
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV DEBIAN_FRONTEND=noninteractive
ENV AUTO_ACCEPT_LICENSE=true

COPY init.py ./
COPY .bmcgo ./.bmcgo
COPY build ./build

RUN sed -i 's/archive.ubuntu/mirrors.huaweicloud/g' /etc/apt/sources.list.d/ubuntu.sources && \
    sed -i 's/security.ubuntu/mirrors.huaweicloud/g' /etc/apt/sources.list.d/ubuntu.sources
RUN apt-get update && \
    apt-get install -y python3 sudo make locales wget

RUN locale-gen en_US.UTF-8 && update-locale LANG=en_US.UTF-8

RUN wget https://dailybuild.openubmc.cn/tools/gcm-linux_amd64.2.6.1.deb && \
    dpkg -i gcm-linux_amd64.2.6.1.deb && \
    rm -rf gcm-linux_amd64.2.6.1.deb

# 依赖 BuildKit secret + 额外 build context 将外部 SDK 和凭据以挂载方式注入，避免进入镜像层
RUN --mount=type=secret,id=openubmc_user \
    --mount=type=secret,id=openubmc_pass \
    --mount=type=bind,from=sdk,source=${SDK_ARCHIVE},target=/tmp/bmc_sdk.zip,ro \
    bash -euo pipefail -c '\
        CONAN_USER="$(cat /run/secrets/openubmc_user 2>/dev/null || true)"; \
        CONAN_PASS="$(cat /run/secrets/openubmc_pass 2>/dev/null || true)"; \
        python3 init.py -path /tmp/bmc_sdk.zip -user "${CONAN_USER}" -psw "${CONAN_PASS}"; \
        conan remote logout openubmc_sdk; \
        conan remote logout openubmc_opensource; \
        echo "bmc-studio bmc-studio/accept-license boolean true" | debconf-set-selections \
    '

# 下载可观测特性需要使用的可视化后端及生成一键部署脚本
WORKDIR /app

# 创建workspace目录并下载、解压otelcol
RUN mkdir -p /root/workspace/observability && \
    cd /root/workspace/observability && \
    mkdir collector-contrib && \
    cd collector-contrib && \
    wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.139.0/otelcol-contrib_0.139.0_linux_amd64.tar.gz --no-check-certificate && \
    tar -zxvf otelcol-contrib_0.139.0_linux_amd64.tar.gz && \
    cd .. && \
    wget https://repo1.maven.org/maven2/io/zipkin/zipkin-server/3.5.1/zipkin-server-3.5.1-exec.jar --no-check-certificate && \
    wget https://mirrors.huaweicloud.com/openjdk/17.0.1/openjdk-17.0.1_linux-x64_bin.tar.gz --no-check-certificate && \
    tar -zxvf openjdk-17.0.1_linux-x64_bin.tar.gz && \
    wget https://github.com/prometheus/prometheus/releases/download/v3.7.3/prometheus-3.7.3.linux-amd64.tar.gz --no-check-certificate && \
    tar -zxvf prometheus-3.7.3.linux-amd64.tar.gz && \
    apt-get install -y adduser libfontconfig1 musl expect && \
    wget https://dl.grafana.com/grafana-enterprise/release/12.2.1/grafana-enterprise_12.2.1_18655849634_linux_amd64.deb --no-check-certificate && \
    sudo dpkg -i grafana-enterprise_12.2.1_18655849634_linux_amd64.deb

# 创建otelcol-config.yaml配置文件
RUN  cat > /root/workspace/observability/collector-contrib/otelcol-config.yaml <<'EOF'
receivers: 
  otlp:
    protocols:
      http:
        endpoint: "0.0.0.0:4318"  # OTLP over HTTP
        max_request_body_size: 67108864  # 64MB
        tls:
          cert_file: /root/workspace/observability/collector-contrib/cert/observability.otelcol.crt      # collector证书
          key_file: /root/workspace/observability/collector-contrib/cert/observability.otelcol.key.pem   # collector密钥

processors:
  # 批处理器 - 控制数据分批
  batch:
    send_batch_size: 1024
    send_batch_max_size: 2048
    timeout: 1s
    
  # 内存限制器
  memory_limiter:
    limit_mib: 512
    spike_limit_mib: 128
    check_interval: 5s

exporters:
  debug:
    verbosity: detailed
  prometheus:
    endpoint: "127.0.0.1:49091"
    namespace: "otelcol"
  elasticsearch:
    endpoint: "https://127.0.0.1:9200"
    user: "elastic"
    password: "elastic_password"  # elasticsearch初始启动过程可获取默认密码，或通过自行重置获取密码，此处为预置密码，非必要可不更改
    tls:
      ca_file: /home/elasticsearch/elasticsearch-9.2.0/config/certs/http_ca.crt
  zipkin:
    endpoint: "http://127.0.0.1:9411/api/v2/spans"
  otlp/jaeger:
    endpoint: https://127.0.0.1:44318

service:
  pipelines:
    metrics:
      receivers: [otlp]
      exporters: [debug, prometheus]
    logs:
      receivers: [otlp]
      exporters: [debug, elasticsearch]
    traces:
      receivers: [otlp]
      exporters: [debug, zipkin]
EOF

# 创建grafana预置zipkin、prometheus和elasticsearch数据源的配置文件
RUN  cat > /usr/share/grafana/conf/provisioning/datasources/config.yaml <<'EOF'
apiVersion: 1

datasources:
  - name: Zipkin
    type: zipkin
    url: http://localhost:9411
    access: proxy
    basicAuth: false
    readOnly: true
    isDefault: false
    jsonData:
      nodeGraph:
        enabled: true
      traceQuery:
        timeShiftEnabled: true
        spanStartTimeShift: '1h'
        spanEndTimeShift: '-1h'
      spanBar:
        type: 'None'
  - name: Prometheus
    type: prometheus
    access: proxy
    url: http://localhost:49092
    jsonData:
      httpMethod: POST
      manageAlerts: true
      allowAsRecordingRulesTarget: true
      prometheusType: Prometheus
      prometheusVersion: 3.3.0
      cacheLevel: 'High'
      disableRecordingRules: false
      timeInterval: 15s # Prometheus scrape interval
      incrementalQueryOverlapWindow: 10m
  - name: Elasticsearch
    type: elasticsearch
    access: proxy
    url: https://localhost:9200
    basicAuth: true
    basicAuthUser: "elastic"
    jsonData:
      tlsSkipVerify: true
      timeField: "@timestamp"
      sigV4Auth: false
      includeFrozen: false
      logLevelField: ""
      logMessageField: ""
      maxConcurrentShardRequests: 5
      oauthPassThru: false
      pdcInjected: false
      serverName: ""
      tlsAuth: false
      tlsAuthWithCACert: false
    secureJsonData:
      basicAuthPassword: "elastic_password"
EOF

# 创建重置Elasticsearch密码脚本
RUN cat > /root/workspace/observability/reset_elastic_password.exp <<'EOF'
#!/usr/bin/expect -f

set timeout 60
set password "elastic_password"

spawn su elasticsearch -c "/home/elasticsearch/elasticsearch-9.2.0/bin/elasticsearch-reset-password -u elastic -i"

expect "Please confirm that you would like to continue"
send "y\r"

expect "Enter password for"
send "$password\r"

expect "Re-enter password for"
send "$password\r"

expect eof
EOF

# 创建observability.sh启动脚本
RUN cat > /root/workspace/observability/observability.sh <<'EOF'
#!/bin/bash

PROMETHEUS_PATH=/root/workspace/observability/prometheus-3.7.3.linux-amd64/prometheus
PROMETHEUS_CONFIG_PATH=/root/workspace/observability/prometheus-3.7.3.linux-amd64/prometheus.yml

OTELCOL_PATH=/root/workspace/observability/collector-contrib/otelcol-contrib
OTELCOL_CONFIG_PATH=/root/workspace/observability/collector-contrib/otelcol-config.yaml

JAVA_PATH=/root/workspace/observability/jdk-17.0.1/bin/java
ZIPKIN_PATH=/root/workspace/observability/zipkin-server-3.5.1-exec.jar

PID_FILE=/tmp/observability_pids.txt

GRAFANA_HOME_PATH=/usr/share/grafana
GRAFANA_PATH=/usr/sbin/grafana-server

start_services() {
    echo "Starting Elasticsearch..."
    su - elasticsearch -c "nohup /home/elasticsearch/elasticsearch-9.2.0/bin/elasticsearch 2>/dev/null &"
    sleep 10
    echo "$! elasticsearch" > "$PID_FILE"
    echo "Elasticsearch started with PID $!"

    sleep 2
    echo "Starting Prometheus..."
    nohup "$PROMETHEUS_PATH" --web.enable-otlp-receiver --web.listen-address=0.0.0.0:49092 --config.file="$PROMETHEUS_CONFIG_PATH" 2>/dev/null &
    echo "$! prometheus" >> "$PID_FILE"
    echo "Prometheus started with PID $!"

    sleep 2
    echo "Starting Zipkin..."
    nohup "$JAVA_PATH" -jar "$ZIPKIN_PATH" 2>/dev/null &
    echo "$! zipkin" >> "$PID_FILE"
    echo "Zipkin started with PID $!"

    sleep 2
    echo "Starting Grafana..."
    nohup "$GRAFANA_PATH" --homepath "$GRAFANA_HOME_PATH" 2>/dev/null &
    echo "$! grafana" >> "$PID_FILE"
    echo "Grafana started with PID $!"

    sleep 2
    echo "Starting OpenTelemetry Collector..."
    nohup "$OTELCOL_PATH" --config "$OTELCOL_CONFIG_PATH" 2>/dev/null &
    echo "$! otelcol" >> "$PID_FILE"
    echo "otelcol started with PID $!"

    sleep 2
    echo "Reseting Elasticsearch default password..."
    expect /root/workspace/observability/reset_elastic_password.exp

    echo "All observability services have been started."
}

stop_services() {
    if [ ! -f "$PID_FILE" ]; then
        echo "PID file not found. No services to stop."
        return
    fi

    echo "Stopping observability services..."
    while read -r pid service; do
        if kill -0 "$pid" 2>/dev/null; then
            kill "$pid"
            echo "Stopped $service (PID: $pid)"
        else
            echo "$service (PID: $pid) was not running"
        fi
    done < "$PID_FILE"

    rm -f "$PID_FILE"
    echo "All observability services have been stopped."
}

case "$1" in
    start)
        start_services
        ;;
    stop)
        stop_services
        ;;
    restart)
        stop_services
        sleep 2
        start_services
        ;;
    *)
        echo "Usage: $0 {start|stop|restart}"
        exit 1
esac
EOF

RUN useradd -m -s /bin/bash elasticsearch
USER elasticsearch
RUN cd ~ && \
    wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-9.2.0-linux-x86_64.tar.gz --no-check-certificate && \
    tar -zxvf elasticsearch-9.2.0-linux-x86_64.tar.gz

USER root
RUN chown -R elasticsearch:elasticsearch /home/elasticsearch/

# 替换prometheus.yml配置文件内容
RUN cat > /root/workspace/observability/prometheus-3.7.3.linux-amd64/prometheus.yml <<'EOF'
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
          # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["127.0.0.1:49091"]  # 监听49091端口，用于接收collector发送的Metric数据
       # The label name is added as a label `label_name=<label_value>` to any timeseries scraped from this config.
        labels:
          app: "prometheus"
EOF

# 给脚本添加执行权限，需要进入容器后手工执行
RUN sed -i 's/\r$//' /root/workspace/observability/observability.sh && sed -i 's/\r$//' /root/workspace/observability/reset_elastic_password.exp && \ 
    chmod +x /root/workspace/observability/observability.sh && chmod +x /root/workspace/observability/reset_elastic_password.exp 

RUN echo "openUBMC的环境设置已完成，请在容器内自行配置git或启动可观测特性的可视化后端。（配置方法可参考readme中说明）"

WORKDIR /home/workspace/source
CMD ["sleep", "infinity"]