-- Copyright (c) 2024 Huawei Technologies Co., Ltd.
-- openUBMC is licensed under Mulan PSL v2.
-- You can use this software according to the terms and conditions of the Mulan PSL v2.
-- You may obtain a copy of Mulan PSL v2 at:
--         http://license.coscl.org.cn/MulanPSL2
-- THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
-- EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
-- MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
-- See the Mulan PSL v2 for more details.

local skynet = require "skynet"
local mc_sys_info = require 'mc.sys_info'
local client = require 'fructrl.client'
local context = require 'mc.context'
local ctx_new = context.get_context_or_default()
local c_accycle = require 'ac_cycle'
local class = require 'mc.class'
local log = require 'mc.logging'
local m_enums = require 'types.enums'

local c_pwr_restore = class()

function c_pwr_restore:ctor(ctrlstate, fructrl, id)
    self.fructrl = fructrl
    self.ctrlstate = ctrlstate
    self.system_id = id
end

-- Description : 执行默认上电延迟模式，机架2s内随机延迟，刀片按照槽位延迟
local function hs_do_default_mode_delay(system_id)
    local delay = 0
    local ser_type = 0  -- 默认机架服务器

    -- 获取实际的服务器类型

    log:notice("[System:%s]Start delay time.", system_id)
    log:notice_printf("Start delay time.")
    if ser_type == 0 then
        -- 机架2s内随机
        math.randomseed(math.floor(os.clock() * 1E11))  -- seed the random with a strongly varying seed 
        delay = math.random(200)  -- 取1~200的随机数
        skynet.sleep(delay) -- 延时delay*10ms
    else
        --  刀片按槽位延时,每槽位至少延时500ms
        return
    end
    log:notice("[System:%s]End delay time. (delay=%d)", system_id, delay)
    log:notice_printf("End delay time. (delay=%d)ms", delay * 10)
end

-- Description : 执行一半单板参与的上电延迟模式
local function hs_do_half_mode_delay(delay, system_id)
    log:notice("[System:%s]Start half mode delay.", system_id)
    math.randomseed(math.floor(os.clock() * 1E11))
    local tmp = math.random(200)  -- 取1~200的随机数
    if tmp % 2 == 0 then   -- 随机数对2取模
        if delay > 0 then
            skynet.sleep(delay * 100) -- 延时delay s
        else
            -- 延时默认时间，默认30秒
            skynet.sleep(30 * 100)
            delay = 30
        end
    end

    log:notice("[System:%s]The server is in delay mode HalfDelay. with a delay of %.1f seconds to power on",
        system_id, delay)
end

-- Description : 执行所有单板都参与的延迟
local function hs_do_all_mode_delay(delay, system_id)
    log:notice("[System:%s]Start fixed delay.", system_id)
    if delay > 0 then
        skynet.sleep(delay * 100) -- 延时delay s
    else
        -- 延时默认时间，默认30秒
        skynet.sleep(30 * 100)
        delay = 30
    end

    log:notice("[System:%s]The server is in delay mode FixedDelay. with a delay of %.1f seconds to power on",
        system_id, delay)
end

-- Description : 执行所有单板都参与的随机延迟
local function hs_do_random_mode_delay(delay, system_id)
    log:notice("[System:%s]Start random mode delay.", system_id)

    if delay > 0 then
        math.randomseed(math.floor(os.clock() * 1E11))
        local tmp = math.random(delay * 10)  -- delay精度为0.1 需要乘10
        skynet.sleep(tmp * 10)
        delay = tmp / 10
    else
        -- 延时默认时间，默认30秒
        skynet.sleep(30 * 100)
        delay = 30
    end

    log:notice("[System:%s]The server is in delay mode RandomDelay. with a delay of %.1f seconds to power on",
        system_id, delay)
end

-- 随机上电
local function hs_do_pwr_on_delay(fructrl)
    -- 从持久化文件里获取延迟模式和时间
    local delay_mode = fructrl:get_PowerOnDelayMode()
    local delay_time = fructrl:get_PowerOnDelaySeconds()

    if delay_mode == tostring(m_enums.DelayPowerOnPolicy.DefaultDelay) then
        -- 该模式为禁止长延时，只有短延时(2s随机延时，或按槽位延时)
        hs_do_default_mode_delay(fructrl.system_id)
    elseif delay_mode == tostring(m_enums.DelayPowerOnPolicy.HalfDelay) then
        -- 长延时，随即选取一半的服务器进行延时，延时时间环境变量中读取
        hs_do_half_mode_delay(delay_time, fructrl.system_id)
        return
    elseif delay_mode == tostring(m_enums.DelayPowerOnPolicy.FixedDelay) then
        -- 长延时，所有服务器都进行延时
        hs_do_all_mode_delay(delay_time, fructrl.system_id)
        return
    elseif delay_mode == tostring(m_enums.DelayPowerOnPolicy.RandomDelay) then
        -- 随机延时模式，即该服务器可指定120秒(1~120秒内随机分配一个延时时间进行延时)
        hs_do_random_mode_delay(delay_time, fructrl.system_id)
        return
    end
end

local function delay_for_off_peak(system_id)
    if system_id <= 1 then
        return
    end
    log:notice("[System:%s] delay power on restore for off peak", system_id)
    skynet.sleep((system_id - 1) * 100 * 60)
end

local function wait_pwronlocked_unlock(fructrl)
    log:notice('[System:%s]wait PwrOnLocked unlock···', fructrl.system_id)
    while true do
        -- 上电锁内部有超时机制，最终必然会解锁
        if not fructrl:get_PwrOnLocked() then
            log:notice('[System:%s]Quit wait_pwronlocked_unlock already.', fructrl.system_id)
            break
        end
        skynet.sleep(200)
    end
    return true
end

-- 实际的通电开机策略任务
function c_pwr_restore:pp_do_pwr_restore_process(ctrlstate, fructrl)
    local power_on_restore = nil

    -- 从持久化文件里获取通电开机策略
    local restore_policy = fructrl:get_PowerOnStrategy()

    if restore_policy == tostring(m_enums.PowerRestorePolicy.AlwaysPowerOff)  then
        log:notice("[System:%s]Power restore policy.........................................always-off.",
            self.system_id)
    elseif restore_policy == tostring(m_enums.PowerRestorePolicy.AlwaysPowerOn)  then
        power_on_restore = m_enums.RestartCause.ACRestoreAlwaysPowerUp
        log:notice("[System:%s]Power restore policy.........................................always-on.",
            self.system_id)
    elseif restore_policy == tostring(m_enums.PowerRestorePolicy.LastState)  then
        local pwr_state = fructrl:get_PwrStateBeforeACLost()
        log:notice('[System:%s]pwr_state is %s', self.system_id, pwr_state)
        if pwr_state then
            power_on_restore = m_enums.RestartCause.ACRestoreKeepPreviousState
        end
        log:notice("[System:%s]Power restore policy.........................................previous.",
            self.system_id)
    else
        power_on_restore = m_enums.RestartCause.ACRestoreAlwaysPowerUp
        log:error("[System:%s]Failed to obtain the power restore policy. The fru will be powered on.",
            self.system_id)
    end

    if power_on_restore ~= nil then
        -- 先执行延时, 再根据恢复策略需要上电，后面需要清除下电
        hs_do_pwr_on_delay(fructrl)

        -- multihost机型AC后错峰上电，错峰时间1min
        delay_for_off_peak(self.system_id)

        -- 等上电锁解锁
        wait_pwronlocked_unlock(fructrl)
        ctrlstate:send_powerctrl_event(m_enums.CtrlEventTypes.CtrlPowerOn, m_enums.FruId.OS)
        -- 记录复位原因
        fructrl:add_RestartCauseRecords(power_on_restore)
        skynet.fork_once(function()
            -- 通电开机记录InitiatedByPowerUp的SEL日志
            fructrl:set_CurrentRestartType(m_enums.RestartInitiateId['InitiatedByPowerUp'])
            skynet.sleep(200) -- 等待2s，等待传感器事件生成后恢复默认值
            fructrl:set_CurrentRestartType(m_enums.RestartInitiateId['None'])
        end)
    end
end

-- 0: AC/整机上电  HARD_RESET
-- 其他: 软复位  SOFT_RESET
function c_pwr_restore:get_bmc_reset_type()
    local ok, ret = mc_sys_info.get_reset_type()
    if not ok then
        log:error("get reset type failed, err:%s", tostring(ret))
        return 'soft_reset'
    end
    local reset_type = ret == 0 and 'hard_reset' or 'soft_reset' 
    log:notice('[System%s]bmc_reset_type is %s.', self.system_id, reset_type)
    return reset_type
end

local function get_env_object()
    local objects = client:GetEnvObjects()
    local res
    for _, obj in pairs(objects) do
        res = obj
    end
    log:error('get env object failed')
    return res
end

local function notify_m3_reset()
    client:ForeachSecureBootObjects(function(obj)
        local ok, rsp = pcall(obj.ResetOnNextBMCReset, obj, ctx_new)
        if not ok then
            log:error('notify m3 reset failed, error: %s', rsp)
        end
    end)
end

local function bmc_reset()
    client:ForeachSystemControlObjects(function(obj)
        -- 重启类型0:正常启动，1:最小系统
        local ok, rsp = pcall(obj.GracefulReset, obj, ctx_new, 0)
        if not ok then
            log:error('reset bmc fail, error: %s', rsp)
        end
    end)
end

local function none(fructrl)
    log:notice('[System:%s]active mode is None', fructrl.system_id)
    return true
end

local function reset(fructrl)
    -- 生效方式为复位bmc
    log:notice('[System:%s]reset bmc to active environment variable', fructrl.system_id)
    bmc_reset()
    return false
end

local function reset_bmc_and_securecore(fructrl)
    -- 生效方式为复位bmc并通知复位M3
    log:notice('[System:%s]notify m3 reset and reset bmc to active environment variable', fructrl.system_id)
    notify_m3_reset()
    bmc_reset()
    return false
end

local function reset_bmc_and_securecore_on_powerdown(fructrl)
    -- 生效方式为复位bmc并通知复位M3,并检查是否当前业务系统为下电，否则此次不执行生效策略
    log:notice('[System:%s]notify m3 reset and reset bmc to active environment variable, start check power state',
        fructrl.system_id)
    if fructrl.pg_signal:get_PGSignal() == 1 then
        log:error('[System:%s]power state is on, no need active environment variable this time', fructrl.system_id)
        return true
    end
    notify_m3_reset()
    bmc_reset()
    return false
end

local function accycle(fructrl)
    -- 生效方式为AC
    if fructrl.pg_signal:get_PGSignal() == 1 then
        log:error('[System:%s]power state is on, no need active environment variable this time', fructrl.system_id)
        return true
    end
    log:notice('[System:%s]ACCycle to active environment variable', fructrl.system_id)
    local ac_cycle = c_accycle:get_instance()
    ac_cycle:ac_down()
    return false
end

-- SetPowerOnStrategyExceptions入参
function c_pwr_restore:update_pwr_restore(Reason, Execute, EffectivePeriod, Priority)
    local tab = { Reason, Execute, EffectivePeriod, Priority }
    local execute_table = {None = 0, No = 1, Yes = 2}
    -- 先获取上次值
    local val = self.fructrl:get_PwrRestoreRecord() or {}
    if next(val) == nil then
        self.fructrl:set_PwrRestoreRecord(tab)
        self.fructrl:set_PowerOnStrategyExceptions(execute_table[tab[2]])
        return
    end
    -- 传入的优先级更高或相等时更新所有属性
    if tab[4] <= val[4] then
        val[1] = tab[1]     -- Reason
        val[2] = tab[2]     -- Execute
        val[3] = tab[3]     -- EffectivePeriod
        val[4] = tab[4]     -- Priority
        log:notice('[System:%s]Update item successfully, Reason=(%s), Execute=(%s), ' ..
            'EffectivePeriod=(%s), Priority=(%s)', self.system_id, tab[1], tab[2], tab[3], tab[4])
        self.fructrl:set_PowerOnStrategyExceptions(execute_table[tab[2]])
        self.fructrl:set_PwrRestoreRecord(val)
    end
end

local activemode_table = {
    [0] = 'None',
    [1] = 'ResetBMC',
    [2] = 'ResetBMCAndSecureCore',
    [3] = 'ResetBMCAndSecureCoreOnPowerDown',
    [4] = 'ACCycle'
}
local activemode_handler = {
    ['None'] = none,
    ['ResetBMC'] = reset,
    ['ResetBMCAndSecureCore'] = reset_bmc_and_securecore,
    ['ResetBMCAndSecureCoreOnPowerDown'] = reset_bmc_and_securecore_on_powerdown,
    ['ACCycle'] = accycle
}

local function check_active_env_var(self)
    local obj = get_env_object()
    if not obj then
        return true
    end

    local load_msg = obj.LoadCompleted and 'loading is done' or 'is init'
    local change_msg = obj.IsChanged and 'has changed' or 'unchanged'
    -- 环境变量LoadCompleted为falses说明校验为init，不允许上电
    if not obj.LoadCompleted then
        local count = 0
        while true do
            skynet.sleep(100)
            count = count + 1
            log:notice('[System:%s]wait LoadCompleted for %s s', self.system_id, count)
            if obj.LoadCompleted then
                log:notice('[System:%s]chip environment variable %s and value %s', self.system_id,
                    load_msg, change_msg)
                break
            end
            if count == 40 then
                log:notice('[System:%s]chip environment variable %s and value %s, power on is not allowed',
                    self.system_id, load_msg, change_msg)
                return false
            end
        end
    else
        log:notice('[System:%s]chip environment variable %s and value %s', self.system_id, load_msg, change_msg)
    end
    local active_mode = obj.ActiveMode
    if activemode_table[active_mode] == 'ResetBMC' or
        activemode_table[active_mode] == 'ResetBMCAndSecureCore' or
        activemode_table[active_mode] == 'ResetBMCAndSecureCoreOnPowerDown' then
        log:notice('[System:%s]active environment variable need special power restore', self.system_id)
        self:update_pwr_restore('ActiveEnv', 'Yes', 'Once', '2')
        -- 延时3s保证写入flash
        skynet.sleep(300)
    end

    local handler = activemode_handler[activemode_table[active_mode]]
    if handler then return handler(self.fructrl) end
    return true
end

local function power_strategy_exceptions(self, ctrlstate, fructrl)
    local data = fructrl:get_PwrRestoreRecord() or {}
    log:notice('[System:%s]PwrRestoreRecord is, Reason=(%s), Execute=(%s), EffectivePeriod=(%s), Priority=(%s)',
        self.system_id, data[1], data[2], data[3], data[4])
    if next(data) ~= nil and data[3] == 'Once' then
        -- 判断是否为单次周期以及例外的通电开机策略是执行还是不执行
        fructrl:set_PwrRestoreRecord({})
        fructrl:set_PowerOnStrategyExceptions(0)
        if data[2] == 'Yes' then
            log:notice("[System:%s]power restore policy needs to be executed", self.system_id)
            self:pp_do_pwr_restore_process(ctrlstate, fructrl)
            return true
        elseif data[2] == 'No' then
            log:notice("[System:%s]power restore policy does not need to be executed", self.system_id)
            return true
        end
    end
    return false
end

-- 电源恢复策略，原则是有reboot、cycle等指令时，优先按指令；设备不清楚是否上电时，按策略
local function pp_do_pwr_restore(self)
    -- 复位原因和复位通道初始化起来默认0
    self.fructrl:add_RestartCauseRecords(m_enums.RestartCause.Unknown)
    self.fructrl:set_restart_channel(m_enums.ChannelId.CT_IPMB)
    -- 获取复位类型
    local reset_type = self:get_bmc_reset_type(self)
    if reset_type == 'hard_reset' then
        -- 环境变量生效策略只在硬复位校验
        if not check_active_env_var(self) then
            return
        end
    end
    -- 持久化文件里记录reboot、cycle等动作
    if self.fructrl:get_PwrCycleType() ~= 0 then
        -- 额外通电开机有powercycle任务时也需要清除
        self.fructrl:set_PwrRestoreRecord({})
        self.fructrl:set_PowerOnStrategyExceptions(0)
        log:notice("[System:%s]Already in power cycle, no need power restore.", self.system_id)
        return
    end
    -- 判断是否有额外的通电开机策略
    if power_strategy_exceptions(self, self.ctrlstate, self.fructrl) then
        return
    end

    if reset_type == 'soft_reset' then
        -- 软复位不执行通电开机策略
        return
    end

    self:pp_do_pwr_restore_process(self.ctrlstate, self.fructrl)
end

function c_pwr_restore:init()
    -- 通电开机策略 单次任务
    skynet.fork_once(function()
        pp_do_pwr_restore(self)
    end)
end

return c_pwr_restore
