-- Copyright (c) 2025 Huawei Technologies Co., Ltd.
-- openUBMC is licensed under Mulan PSL v2.
-- You can use this software according to the terms and conditions of the Mulan PSL v2.
-- You may obtain a copy of Mulan PSL v2 at:
--         http://license.coscl.org.cn/MulanPSL2
-- THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
-- EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
-- MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
-- See the Mulan PSL v2 for more details.

local skynet = require 'skynet'
local observability_client = require 'observability.client'
local singleton = require 'mc.singleton'
local class = require 'mc.class'
local log = require 'mc.logging'
local file_sec = require 'utils.file'
local vos = require 'utils.vos'
local mc_utils = require 'mc.utils'
local safe_close = mc_utils.safe_close_file
local m_config = require 'public.config'
local defs = require 'public.defs'

local FLUENT_BIT_SERVICE_NAME = 'fluent-bit'
local FLUENT_BIT_STATE_STOP = 0
local FLUENT_BIT_STATE_START = 1
local FLUENT_BIT_STATE_RESTART = 2

-- 控制是否需要更新fluent-bit运行状态
local g_update_state_flag = true

local m = class()

function m:ctor(db)
    self.db = db
    self.config = m_config.new(self.db)
    self.state = FLUENT_BIT_STATE_START
end

function m:init()
    -- 监控fluent-bit进程状态
    skynet.fork_loop({count = 0}, self.monitor, self)
end

function m:monitor()
    local fluent_bit_ctl = {
        [FLUENT_BIT_STATE_STOP] = self.fluent_bit_service_stop,
        [FLUENT_BIT_STATE_START] = self.fluent_bit_service_start,
        [FLUENT_BIT_STATE_RESTART] = self.fluent_bit_service_restart
    }
    while true do
        if g_update_state_flag then
            g_update_state_flag = false
            self:update_fluent_bit_state()
        end

        fluent_bit_ctl[self.state](self)
        skynet.sleep(200)
    end
end

function m:set_state_flag()
    g_update_state_flag = true
end

local function is_service_name_valid(service_name)
    if not service_name or type(service_name) ~= "string" or string.len(service_name) == 0 then
        log:error("The parameter is invalid")
        return false
    end

    if file_sec.check_shell_special_character_s(service_name) ~= 0 then
        log:error("The service name is illegal")
        return false
    end

    return true
end

-- 判断fluent-bit是否运行
function m:is_service_alive(service_name, pid_path)
    if not is_service_name_valid(service_name) then
        log:error("The service name is invalid")
        return false
    end

    local pid_file = file_sec.open_s(pid_path, 'r')
    if pid_file == nil then
        return false
    end

    local pid = safe_close(pid_file, function ()
        return pid_file:read("*l")
    end)

    if pid == nil or #pid == 0 then
        return false
    end

    local cmdline_path = string.format('/proc/%s/cmdline', pid)
    local proc_cmdline = file_sec.open_s(cmdline_path, 'r')
    if proc_cmdline == nil then
        return false
    end

    local cmdline = safe_close(proc_cmdline, function()
        return proc_cmdline:read('a')
    end)

    local ret = string.find(cmdline, "fluent")
    if ret ~= nil then
        return true
    end

    return false
end

-- 启动fluent-bit服务
function m:start_fluent_bit_service(service_name)
    if not is_service_name_valid(service_name) then
        log:error("The service name(%s) is invalid", service_name)
        return false
    end

    local cmd = '/bin/fluent-bit -c /data/trust/etc/fluent-bit.yaml -l /dev/shm/log/net_stream.log > /dev/null 2>&1 &'
    local ok, ret = pcall(vos.system_s, '/bin/sh', '-c', cmd)
    if not ok or ret ~= 0 then
        log:error("Start the service(%s) failed, error is %s", service_name, ret)
        return false
    end

    -- 启动后获取进程pid号
    cmd = '/bin/ps -ef | /bin/grep fluent-bit | /bin/grep -v grep | ' ..
            '/usr/bin/awk \'NR==1{print $2}\' > ' .. defs.FLUENT_BIT_PID_FILE_PATH
    ok, ret = pcall(vos.system_s, '/bin/sh', '-c', cmd)
    if not ok or ret ~= 0 then
        log:error("Get service(%s) pid failed, error is %s", service_name, ret)
        return false
    end

    ok = self.config:write_data_to_fifo()
    if not ok then
        log:error("Write SSL key to fifo failed")
        return false
    end

    log:info("Start fluent-bit service successfully")
    return true
end

-- 中止fluent-bit服务
function m:stop_fluent_bit_service(service_name)
    if not is_service_name_valid(service_name) then
        log:error("The service name(%s) is invalid", service_name)
        return false
    end

    local cmd = '/bin/ps -ef | /bin/grep fluent-bit | /bin/grep -v grep | ' ..
            '/usr/bin/awk \'NR==1{print $2}\' | xargs /bin/kill -9 > /dev/null 2>&1'
    local ok, ret = pcall(vos.system_s, '/bin/sh', '-c', cmd)
    if not ok or ret ~= 0 then
        log:error("Stop the service(%s) failed, error is %s", service_name, ret)
        return false
    end

    log:info("Stop fluent-bit service successfully")
    return true
end

local function start_memory_monitor()
    if vos.get_file_accessible(defs.MEMORY_MONITOR_SCRIPT) then
        local cmd = '/bin/bash ' .. defs.MEMORY_MONITOR_SCRIPT .. ' > /dev/null 2>&1 &'
        local ok, ret = pcall(vos.system_s, '/bin/sh', '-c', cmd)
        if not ok or ret ~= 0 then
            log:error("Start fluent memory monitor failed, error is %s", ret)
            return false
        end
    end
end

local function stop_memory_monitor()
    local cmd = '/bin/ps -ef | /bin/grep fluent_memory_monitor | /bin/grep -v grep | ' ..
        '/usr/bin/awk \'NR==1{print $2}\' | xargs /bin/kill -9 > /dev/null 2>&1'
    local ok, ret = pcall(vos.system_s, '/bin/sh', '-c', cmd)
    if not ok then
        log:error("Stop fluent memory monitor failed, error is %s", ret)
        return false
    end
end

function m:fluent_bit_service_start()
    if self:is_service_alive(FLUENT_BIT_SERVICE_NAME, defs.FLUENT_BIT_PID_FILE_PATH) then
        return true
    end

    local ok = self.config:generate()
    if not ok then
        log:error("Generate fluent-bit config failed")
        return false
    end

    local ret = self:start_fluent_bit_service(FLUENT_BIT_SERVICE_NAME)
    if not ret then
        log:error("Start fluent-bit failed")
        return false
    end

    start_memory_monitor()
    return true
end

function m:fluent_bit_service_stop()
    if not self:is_service_alive(FLUENT_BIT_SERVICE_NAME, defs.FLUENT_BIT_PID_FILE_PATH) then
        return true
    end

    local ret = self:stop_fluent_bit_service(FLUENT_BIT_SERVICE_NAME)
    if not ret then
        log:error("Stop fluent-bit failed")
        return false
    end

    stop_memory_monitor()
    return true
end

function m:fluent_bit_service_restart()
    local ret = self:fluent_bit_service_stop()
    if not ret then
        log:error("Restart fluent-bit failed")
        return false
    end

    ret = self:fluent_bit_service_start()
    if not ret then
        log:error("Restart fluent-bit failed")
        return false
    end

    self.state = FLUENT_BIT_STATE_START
    return true
end

local function set_trace_report_status(state)
    local obj = observability_client:GetDashboardObservabilityObject()
    if not obj then
        log:error("Get Dashboard obj failed")
        return
    end

    if state == FLUENT_BIT_STATE_STOP then
        obj.Enabled = false
    elseif state == FLUENT_BIT_STATE_START then
        obj.Enabled = true
    end
end

function m:update_fluent_bit_state()
    local observability_obj = self.db:select(self.db.ObservabilityService):first()
    local receiver_obj = self.db:select(self.db.Receivers):where(self.db.Receivers.ReceiverId:eq(0)):first()
    if observability_obj == nil or receiver_obj == nil then
        log:error("Get observability config failed")
        return nil
    end

    if not observability_obj.Enabled or not receiver_obj.Enabled then
        self.state = FLUENT_BIT_STATE_STOP
        set_trace_report_status(FLUENT_BIT_STATE_STOP)
        return
    end

    if self.state == FLUENT_BIT_STATE_STOP then
        self.state = FLUENT_BIT_STATE_START
        set_trace_report_status(FLUENT_BIT_STATE_START)
        return
    end

    if self.state == FLUENT_BIT_STATE_START then
        self.state = FLUENT_BIT_STATE_RESTART
        set_trace_report_status(FLUENT_BIT_STATE_START)
        return
    end
end

return singleton(m)