
import numpy as np

class Task:
    __slots__ = ("mi", "deadline", "arrival_time", "id")
    def __init__(self, tid, mi, deadline, arrival_time):
        self.id = tid
        self.mi = mi
        self.deadline = deadline
        self.arrival_time = arrival_time

class Node:
    def __init__(self, nid, mips, is_edge=False):
        self.id = nid
        self.mips = mips
        self.is_edge = is_edge
        self.time = 0.0
        self.energy = 0.0
        self.busy_time = 0.0

    def schedule(self, task: Task):
        exec_time = task.mi / self.mips
        start = max(self.time, task.arrival_time)
        finish = start + exec_time
        self.busy_time += exec_time
        self.time = finish
        # simple dynamic energy model
        self.energy += 1e-6 * self.mips * exec_time
        return finish, exec_time

class CloudEdgeEnv:
    """
    Minimal cloud-edge scheduling environment for RL-MOTS.
    State: [mean_load, pending_ratio]; Actions: node index to assign next task.
    Reward = -(w_energy*energy + w_cost*cost) + w_qos*qos_ok
    """
    def __init__(self, cfg):
        self.cfg = cfg
        self.rng = np.random.RandomState(cfg.get("seed", 0))
        self.nodes = []
        lo, hi = cfg["env"]["vm_mips_range"]
        for i in range(cfg["env"]["n_vms"]):
            self.nodes.append(Node(i, int(self.rng.randint(lo, hi+1)), is_edge=False))
        elo, ehi = cfg["env"]["edge_mips_range"]
        base_id = cfg["env"]["n_vms"]
        for i in range(cfg["env"]["n_edge_nodes"]):
            self.nodes.append(Node(base_id+i, int(self.rng.randint(elo, ehi+1)), is_edge=True))

        self.migration_penalty = cfg["env"]["migration_penalty"]
        self.deadline_scale = cfg["env"]["deadline_scale"]
        self.reset_tasks()

        w = cfg["reward_weights"]
        self.w_energy = w["energy"]
        self.w_cost = w["cost"]
        self.w_qos = w["qos"]

    def reset_tasks(self):
        n = self.cfg["tasks"]["n_tasks"]
        lo, hi = self.cfg["tasks"]["mi_range"]
        lam = n / 100.0
        self.tasks = []
        t = 0.0
        for i in range(n):
            t += self.rng.exponential(1.0/lam)
            mi = int(self.rng.randint(lo, hi+1))
            deadline = t + (mi / 20000.0) * self.deadline_scale * 5.0
            self.tasks.append(Task(i, mi, deadline, t))

    def state(self):
        loads = [node.busy_time / max(1e-6, node.time) for node in self.nodes]
        mean_load = float(np.mean(loads)) if loads else 0.0
        pending = len(self.tasks)
        return np.array([mean_load, pending / max(1, self.cfg["tasks"]["n_tasks"])], dtype=np.float32)

    def step(self, action):
        if not self.tasks:
            return self.state(), 0.0, True, {}
        task = self.tasks.pop(0)
        node = self.nodes[int(action) % len(self.nodes)]
        finish, exec_time = node.schedule(task)

        # proxy costs
        cost = 5e-7 * node.mips * exec_time
        energy_delta = 1e-6 * node.mips * exec_time
        qos_ok = 1.0 if finish <= task.deadline else 0.0

        reward = -(self.w_energy * energy_delta + self.w_cost * cost) + self.w_qos * qos_ok
        done = len(self.tasks) == 0
        return self.state(), float(reward), done, {"finish": finish, "qos": qos_ok}

    @property
    def action_space(self):
        return len(self.nodes)
