-- Copyright (c) 2024 Huawei Technologies Co., Ltd.
-- openUBMC is licensed under Mulan PSL v2.
-- You can use this software according to the terms and conditions of the Mulan PSL v2.
-- You may obtain a copy of Mulan PSL v2 at:
--         http://license.coscl.org.cn/MulanPSL2
-- THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
-- EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
-- MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
-- See the Mulan PSL v2 for more details.

local c_controller_collection = require 'controller.controller_collection'
local c_drive_collection = require 'drive.drive_collection'
local c_storageconfig = require 'storageconfig.storageconfig_object'
local c_tasks = require 'mc.orm.tasks'
local singleton = require 'mc.singleton'
local class = require 'mc.class'
local skynet = require 'skynet'
local log = require 'mc.logging'
local common_def = require 'common_def'

local PD_IDENTIFY_INTERVAL <const> = 100
local PD_MAX_TRY <const> = 600 * 5 -- 每5分钟 打印一次未定位的pd
local TASK_PD_IDENTIFY <const> = 'pd_identify'
local MAX_LOCATE_LOG_COUNT <const> = 200

-- pd 定位服务
local pd_identify_service = class()

function pd_identify_service:ctor(db)
    self.db = db
    self.drive_list = {}         -- 未定位的 drive 列表
    self.pd_list = {}            -- 未定位的 pd 列表
    self.controller_list = {}    -- 已初始化过的控制器列表
    self.identified_list = {}    -- 已定位的 {pd, drive} 映射列表
    self.task = nil              -- 定位任务
    self.skip = false            -- 跳过定位任务
    self.add_pd_complete = false -- 某张raid卡pd第一轮添加完成标记
    self.locating_pd_log_count = 0
end

-- 清除硬盘满盘缓存数据
local function set_io_diagnose_info_default_values(drive)
    drive.io_diagnose_info.drive_name = ''
    drive.io_diagnose_info.media_type = ''
    drive.io_diagnose_info.manufacturer = ''
    drive.io_diagnose_info.serial_number = ''
    drive.io_diagnose_info.protocol = common_def.INVALID_U8
    drive.io_diagnose_info.ref_controller_id = common_def.INVALID_U8
    drive.io_diagnose_info.glist_cnt = 0
    drive.io_diagnose_info.plist_cnt = 0
    drive.io_diagnose_info.unc_static = 0
    drive.io_diagnose_info.media_error = 0
    drive.io_diagnose_info.other_error = 0
    drive.io_diagnose_info.cmd_timeout = 0
    drive.io_diagnose_info.unexpected_sense = 0
    drive.io_diagnose_info.bit_error_rate_zone = ''
    drive.io_diagnose_info.flyheight_clearance_delta_outer = ''
    drive.io_diagnose_info.flyheight_clearance_delta_inner = ''
    drive.io_diagnose_info.flyheight_clearance_delta_middle = ''
    drive.io_diagnose_info.cur_farm = ''
    drive.io_diagnose_info.factory_farm = ''
end

-- 清除预估寿命缓存数据
local function set_estimated_remaining_lifespan_info_default_values(drive)
    drive.estimated_remaining_lifespan_info.drive_name = ''
    drive.estimated_remaining_lifespan_info.media_type = ''
    drive.estimated_remaining_lifespan_info.manufacturer = ''
    drive.estimated_remaining_lifespan_info.serial_number = ''
    drive.estimated_remaining_lifespan_info.protocol = common_def.INVALID_U8
    drive.estimated_remaining_lifespan_info.ref_controller_id = common_def.INVALID_U8
    drive.estimated_remaining_lifespan_info.is_support_hw_defined = 0
    drive.estimated_remaining_lifespan_info.slc_avg_ec = common_def.INVALID_U32
    drive.estimated_remaining_lifespan_info.tlc_avg_ec = common_def.INVALID_U32
    drive.estimated_remaining_lifespan_info.slc_poh = common_def.INVALID_U32
    drive.estimated_remaining_lifespan_info.tlc_poh = common_def.INVALID_U32
    drive.estimated_remaining_lifespan_info.slc_pe_cycle = common_def.INVALID_U32
    drive.estimated_remaining_lifespan_info.tlc_pe_cycle = common_def.INVALID_U32
    drive.estimated_remaining_lifespan_info.remn_wearout = common_def.INVALID_U8
    drive.estimated_remaining_lifespan_info.power_on_hours = common_def.INVALID_U32
    drive.estimated_remaining_lifespan_info.tlc_used_lifespan = common_def.INVALID_NUM2STR
    drive.estimated_remaining_lifespan_info.slc_used_lifespan = common_def.INVALID_NUM2STR
end

-- 清除写放大缓存数据
local function set_write_amplification_info_default_values(drive)
    drive.write_amplification_info.update_support_flag = 0
    drive.write_amplification_info.hw_defined_valid_flag = 0
    drive.write_amplification_info.hw_defined_nand_write_l = common_def.STORAGE_INFO_INVALID_DWORD
    drive.write_amplification_info.hw_defined_nand_write_h = common_def.STORAGE_INFO_INVALID_DWORD
    drive.write_amplification_info.hw_defined_host_write_l = common_def.STORAGE_INFO_INVALID_DWORD
    drive.write_amplification_info.hw_defined_host_write_h = common_def.STORAGE_INFO_INVALID_DWORD
    drive.write_amplification_info.vendor_valid_flag = 0
    drive.write_amplification_info.vendor_nand_write = common_def.STORAGE_INFO_INVALID_DWORD
    drive.write_amplification_info.vendor_host_write = common_def.STORAGE_INFO_INVALID_DWORD
end

-- 清空硬盘缓存的亚健康数据
function pd_identify_service:set_subhealth_info_default_values(drive)
    set_io_diagnose_info_default_values(drive)
    set_estimated_remaining_lifespan_info_default_values(drive)
    set_write_amplification_info_default_values(drive)
end

-- 设定硬盘亚健康属性为默认值
function pd_identify_service:set_subhealth_default_values(drive)
    drive.EstimatedRemainingLifespan = common_def.INVALID_U32
    drive.EstimatedRemainingLifespanInsufficient = 0
    drive.IODeteriorationHealthCode = 0
    drive.SLCSpareBlockPercentage = common_def.INVALID_U8
    drive.TLCSpareBlockPercentage = common_def.INVALID_U8
    drive.spare_block_last_check_time = 0
    drive.last_record_spare_block = 0
    self:set_subhealth_info_default_values(drive)
end

function pd_identify_service:on_add_drive_call_back(drive)
    if drive.presence == 1 then
        log:notice('Disk%s add', drive.Id)
        self:add_drive(drive)
    end
    drive.on_presence_changed:on(function(is_presence)
        if is_presence then
            log:notice('Disk%s add', drive.Id)
            self:add_drive(drive)
        else
            -- 拔盘需要做的处理
            log:notice('Disk%s del', drive.Id)
            drive.Missing = 0
            drive.SerialNumber = common_def.INVALID_STRING
            drive.CommandTimeoutTimes = 0
            drive.UnexpectedSenseTimes = 0
            drive.FirmwareStatusError = false
            self:set_subhealth_default_values(drive)
            self:del_drive(drive)
        end
    end)
end

-- 初始化定位服务：
-- 1、订阅每个 controller 的新增和删除 pd 对象信号
-- 2、订阅每个 drive 的在位信号变更信号
function pd_identify_service:init()
    -- 避免pd_identify_service初始化时已经有on_add_controller信号触发过了
    for _, controller in pairs(c_controller_collection.get_instance().controller_list) do
        self.controller_list[controller.Id] = controller

        controller.on_add_pd:on(function(pd)
            log:notice('pd %s add', pd and pd.key)
            self:add_pd(pd)
        end)

        controller.on_add_pd_complete:on(function(complete)
            self.add_pd_complete = complete
        end)

        controller.on_del_pd:on(function(pd)
            log:notice('pd %s del', pd and pd.key)
            self:del_pd(pd)
        end)
    end

    c_controller_collection.get_instance().on_add_controller:on(function(controller)
        if self.controller_list[controller.Id] then
            return
        end
        controller.on_add_pd:on(function(pd)
            log:notice('pd %s add', pd and pd.key)
            self:add_pd(pd)
        end)

        controller.on_add_pd_complete:on(function(complete)
            self.add_pd_complete = complete
        end)

        controller.on_del_pd:on(function(pd)
            log:notice('pd %s del', pd and pd.key)
            self:del_pd(pd)
        end)
    end)

    -- 当OS下电的时候，pd_identify任务跳过实际处理流程
    c_controller_collection.get_instance().on_os_state_changed:on(function(os_state)
        if os_state then
            self.skip = false
        else
            self.skip = true
        end
    end)

    local drives = c_drive_collection.get_instance().get_all_drives()
    for _, v in pairs(drives) do
        self:on_add_drive_call_back(v)
    end

    c_drive_collection.get_instance().on_add_drive:on(function(drive)
        self:on_add_drive_call_back(drive)
    end)
end

-- 查找 pd 在已配对列表中的索引
-- 成功返回索引号，失败返回 nil
function pd_identify_service:find_identified_by_pd(pd)
    for i = 1, #self.identified_list do
        if self.identified_list[i].pd.key == pd.key then
            return i
        end
    end
end

-- 查找 drive 在已配对列表中的索引
-- 成功返回索引号，失败返回 nil
function pd_identify_service:find_identified_by_drive(drive)
    local name = drive.Name
    for i = 1, #self.identified_list do
        if self.identified_list[i].drive.Name == name then
            return i
        end
    end
end

-- 查找 pd_list 索引
-- 成功返回索引号，失败返回 nil
function pd_identify_service:find_pd_list(pd)
    for i = 1, #self.pd_list do
        if self.pd_list[i].key == pd.key then
            return i
        end
    end
end

-- 查找 drive_list 索引
-- 成功返回索引号，失败返回 nil
function pd_identify_service:find_drive_list(drive)
    local name = drive.Name
    for i = 1, #self.drive_list do
        if self.drive_list[i].Name == name then
            return i
        end
    end
end

-- 新增 pd：增加到未配对 pd_list 中
function pd_identify_service:add_pd(pd)
    if self:find_pd_list(pd) or self:find_identified_by_pd(pd) then
        return
    end

    self.pd_list[#self.pd_list + 1] = pd
end

-- 删除 pd
-- 1、从已配对 pd_list 中删除
-- 2、搜索已配对列表，并将 drive 放回未配对 drive_list
function pd_identify_service:del_pd(pd)
    local idx = self:find_pd_list(pd)
    if idx then
        table.remove(self.pd_list, idx)
    end

    local identified_idx = self:find_identified_by_pd(pd)
    if identified_idx then
        local identified = self.identified_list[identified_idx]
        table.remove(self.identified_list, identified_idx)
        self:add_drive(identified.drive)
    end
end

-- 新增 drive：增加到未配对 drive_list 中
function pd_identify_service:add_drive(drive)
    if self:find_drive_list(drive) or self:find_identified_by_drive(drive) then
        return
    end

    self.drive_list[#self.drive_list + 1] = drive
    self:start_identify_task(drive)
end

-- 删除 drive
-- 1、从未配对 drive_list 中删除
-- 2、搜索已配对列表，并将controller中的pd删除
function pd_identify_service:del_drive(drive)
    local idx = self:find_drive_list(drive)
    if idx then
        table.remove(self.drive_list, idx)
    end

    local identified_idx = self:find_identified_by_drive(drive)
    if identified_idx then
        local identified = self.identified_list[identified_idx]
        local controller = c_controller_collection.get_instance():get_by_controller_id(identified.drive.RefControllerId)
        -- 解除定位
        identified.drive:identified(nil)
        -- 将controller中的pd删除
        local pd_idx = self:find_pd_list(identified.pd)
        if pd_idx then
            table.remove(self.pd_list, pd_idx)
        end
        controller.pd_list[identified.pd.key] = nil
        -- 从映射表中移除
        table.remove(self.identified_list, identified_idx)
    end
end

-- 遍历 drive_list 列表
function pd_identify_service:fold_drive_list(cb, acc)
    local exit_loop = false
    for drive_idx, drive in ipairs(self.drive_list) do
        acc, exit_loop = cb(acc, drive, drive_idx)
        if exit_loop then
            break
        end
    end
    return acc
end

-- 计算正在被定位的 drive 总数
function pd_identify_service:get_being_located_cnt()
    local locating_drives = {}
    return self:fold_drive_list(function(count, drive)
        if drive:is_being_located() then
            table.insert(locating_drives, drive.Id)
            count = count + 1
        end
        if self.locating_pd_log_count < MAX_LOCATE_LOG_COUNT then
            log:notice('locating drive count is %s, locating drives : %s', count, table.concat(locating_drives, ', '))
            self.locating_pd_log_count = self.locating_pd_log_count + 1
        end
        return count
    end, 0)
end

-- 检查是否允许建立映射关系
function pd_identify_service:map_allowed(pd, drive)
    if not drive:is_presence() then
        return false
    end

    -- 1、判断 1s 中连续 5 次点灯是否成功
    for _ = 1, 5 do
        c_tasks.get_instance().sleep_ms(200)
        -- sleep 结束后需要判断 drive 是否在位，防止这期间发生热插拔
        if not drive:is_presence() or not pd:is_valid() or not drive:is_being_located() then
            return false
        end
    end

    -- 2、当前只允许一个 drive 在定位
    if self:get_being_located_cnt() == 1 then
        return true
    end

    return false
end

function pd_identify_service:map_pd_to_drive(pd, drive)
    local drive_idx = self:find_drive_list(drive)
    local pd_idx = self:find_pd_list(pd)
    if not drive_idx or not pd_idx then
        return false
    end

    drive:identified(pd)
    table.remove(self.drive_list, drive_idx)
    table.remove(self.pd_list, pd_idx)
    self.identified_list[#self.identified_list + 1] = { pd = pd, drive = drive }
    return true
end

-- 尝试定位 {pd, drive} 配对
function pd_identify_service:locate_pd(pd, drive)
    if not self:map_allowed(pd, drive) then
        return false
    end

    return self:map_pd_to_drive(pd, drive)
end

function pd_identify_service:clone_drive_list()
    local drive_list = {}
    for idx, drive in ipairs(self.drive_list) do
        drive_list[idx] = drive
    end
    return drive_list
end

function pd_identify_service:identify_pd_by_persistence_data(pd)
    log:notice('identify pd %s by persistence data', pd.key)
    local drive_info = self.db:select(self.db.Drive):where(
        self.db.Drive.EnclosureId:eq(pd.enclosure_id),
        self.db.Drive.SlotNumber:eq(pd.slot_num),
        self.db.Drive.RefControllerId:eq(pd.controller_id)
    ):first()

    if not (drive_info and drive_info.Id) then
        log:notice('Pd %s has no persistence data', pd.key)
        return
    end

    local drive
    for _, v in pairs(self.drive_list) do
        if v.Id == drive_info.Id then
            log:notice('identify pd %s by persistence data successfully, ref drive is Disk%s', pd.key, drive_info.Id)
            drive = v
            break
        end
    end

    if not drive then
        return
    end

    return self:map_pd_to_drive(pd, drive)
end

function pd_identify_service:update_identified_data(pd, drive)
    -- 如果数据已存在，则更新，否则新增
    local drive_info = self.db:select(self.db.Drive):where(self.db.Drive.Id:eq(drive.Id)):first()
    if drive_info and drive_info.Id then
        log:notice('update drive%s persistence data', drive.Id)
        log:notice('update info is %s, %s, %s', pd.enclosure_id, pd.slot_num, pd.controller_id)
        drive_info.EnclosureId = pd.enclosure_id
        drive_info.SlotNumber = pd.slot_num
        drive_info.RefControllerId = pd.controller_id
        drive_info:save()
    else
        log:notice('insert drive%s persistence data', drive.Id)
        log:notice('insert info is %s, %s, %s, %s', drive.Id, pd.enclosure_id, pd.slot_num, pd.controller_id)
        self.db:insert(self.db.Drive):value({
            Id = drive.Id,
            EnclosureId = pd.enclosure_id,
            SlotNumber = pd.slot_num,
            RefControllerId = pd.controller_id
        }):exec()
    end
end

function pd_identify_service:identify_task()
    if #self.drive_list == 0 then
        return false
    end

    if #self.pd_list == 0 then
        return true
    end

    -- 永远都是定位第一个 pd
    local pd = self.pd_list[1]

    -- 优先通过持久化数据获取
    if self:identify_pd_by_persistence_data(pd) then
        return #self.drive_list ~= 0
    end

    log:notice('%s locate', pd.key)
    local loc = pd:locate()

    pcall(function()
        if not loc:start() then
            log:info('locate %s failed', pd.key)
            return true
        end
    
        -- 延迟2s，SMC的扫描周期是2s，确保获取到的LocateLed是刷新过的
        skynet.sleep(200)
    
        -- 这里我们拷贝一份 drive_list，原因是定位过程存在多次 sleep 挂起当前任务，
        -- 而 sleep 期间可能发生 drive 或者 pd 动态插拔，拷贝一份可以确保 for 中数组不发生变化
        local drive_list = self:clone_drive_list()
        for _, drive in ipairs(drive_list) do
            -- 判断 pd 是否被热插拔删除了
            if not pd:is_valid() then
                break
            end
    
            if self:locate_pd(pd, drive) then
                self:update_identified_data(pd, drive)
                return true -- 定位成功
            end
        end
    
        -- 定位失败，将当前 pd 放到最后，先定位其他的灯
        if #self.pd_list > 1 and self.pd_list[1] == pd then
            table.remove(self.pd_list, 1)
            self.pd_list[#self.pd_list + 1] = pd
        end
        log:info('identify %s failed', pd.key)
    
        return true
    end)

    if loc then
        loc:__close()
    end
    return true
end

-- 启动定位任务
function pd_identify_service:start_identify_task(drive)
    if self.task or #self.drive_list == 0 then
        return
    end

    local try_count = 0
    local pd_try_count = 0
    log:notice('Start physical drive identify, PassThrough:%s ObjName:%s.', self.PassThrough, self.ObjectName)
    self.task = c_tasks.get_instance():new_task(TASK_PD_IDENTIFY):loop(function(task)
        if not c_storageconfig.get_instance().all_ctrl_loaded or not self.add_pd_complete or self.skip or
            #self.pd_list == 0 then
            return
        end

        if not self:identify_task() then
            -- 没有需要定位的 drive，任务退出
            task:stop()
            self.task = nil
        end
        try_count = try_count + 1
        pd_try_count = pd_try_count + 1
        -- 已轮询过一圈pd，检查是否需要产生告警
        if try_count > #self.pd_list and drive.PassThrough ~= 1 then
            drive:check_link_abnormal()
            try_count = 0
        end

        if pd_try_count > PD_MAX_TRY then
            for _, v in pairs(self.pd_list) do
                log:error('identify %s failed', v.key)
            end
            pd_try_count = 0
        end
    end):set_timeout_ms(PD_IDENTIFY_INTERVAL)
end

return singleton(pd_identify_service)
