← Back to index

Okay, I understand. You want me to create a simulation of the off-chain AI agent (from the provided Rust code) using Python for a Kaggle notebook. This simulation will help test the agent's behavior and the Q-learning algorithm in a controlled environment before deploying it to the actual Solana blockchain.

Here's the Python code for the simulation, designed for a Kaggle notebook environment, along with explanations:

import random
import numpy as np
import pandas as pd
from IPython.display import display, clear_output
import time

# --- Simulation Parameters ---
EXPLORATION_RATE = 0.2
LEARNING_RATE = 0.7
DISCOUNT_FACTOR = 0.9
BASE_AMOUNT = 10
INITIAL_BALANCE = 1000  # Starting balance for the agent
SIMULATION_STEPS = 1000 # Number of steps in the simulation

# --- Simplified Market Environment (for Simulation) ---

class MarketEnvironment:
    def __init__(self, base_price=50, slope=2):
        self.base_price = base_price
        self.slope = slope
        self.total_supply = 0
        self.price_history = []

    def get_price(self):
        price = self.base_price + self.slope * self.total_supply
        self.price_history.append(price)
        return price

    def get_state(self):
        # Determine price trend based on recent history
        if len(self.price_history) >= 3:
            if self.price_history[-1] > self.price_history[-2] > self.price_history[-3]:
                trend = "Upward"
            elif self.price_history[-1] < self.price_history[-2] < self.price_history[-3]:
                trend = "Downward"
            else:
                trend = "Stable"
        else:
            trend = "Stable"

        # Determine supply level (simplified)
        if self.total_supply < 100:
            supply_level = "Low"
        elif self.total_supply < 500:
            supply_level = "Medium"
        else:
            supply_level = "High"

        return {"trend": trend, "supply_level": supply_level}

    def buy(self, amount):
        price = self.get_price()
        cost = price * amount
        self.total_supply += amount
        return cost

    def sell(self, amount):
        price = self.get_price()
        revenue = price * amount
        self.total_supply -= amount
        return revenue

# --- Agent (Adapted from Rust Code) ---

class Agent:
    def __init__(self, agent_id):
        self.agent_id = agent_id
        self.q_table = {}  # Q-table (state, action) -> value
        self.balance = INITIAL_BALANCE
        self.inventory = 0 # Number of tokens the agent holds
        self.last_action = None
        self.last_market_state = None

    def choose_action(self, state):
        if random.random() < EXPLORATION_RATE:
            # Explore
            action = random.choice(["buy", "sell", "hold"])
        else:
            # Exploit
            state_tuple = (state["trend"], state["supply_level"])
            q_values = self.q_table.get(state_tuple, {"buy": 0.0, "sell": 0.0, "hold": 0.0})
            action = max(q_values, key=q_values.get)
        return action

    def update_q_table(self, current_state, action, reward, new_state):
        current_state_tuple = (current_state["trend"], current_state["supply_level"])
        new_state_tuple = (new_state["trend"], new_state["supply_level"])

        if current_state_tuple not in self.q_table:
            self.q_table[current_state_tuple] = {"buy": 0.0, "sell": 0.0, "hold": 0.0}
        if new_state_tuple not in self.q_table:
            self.q_table[new_state_tuple] = {"buy": 0.0, "sell": 0.0, "hold": 0.0}

        current_q = self.q_table[current_state_tuple][action]
        max_next_q = max(self.q_table[new_state_tuple].values())

        new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (
            reward + DISCOUNT_FACTOR * max_next_q
        )
        self.q_table[current_state_tuple][action] = new_q

    def calculate_reward(self, action, cost=0, revenue=0):
        if action == "buy":
            if cost > 0:
                return -cost  # Penalty for buying (spending money)
            else:
                return -50  # Penalty if buying wasn't possible (e.g., not enough balance)
        elif action == "sell":
            if revenue > 0:
                return revenue  # Reward for selling (getting money)
            else:
                return -50 # Penalty if selling wasn't possible (e.g., not enough inventory)
        elif action == "hold":
            # Reward/penalty for holding based on price change
            return 0 # Neutral reward for holding in this simplified version

        return -5 # Penalty for invalid action

# --- Simulation Loop ---

def run_simulation(agent, environment, num_steps=SIMULATION_STEPS):
    results = []
    for step in range(num_steps):
        state = environment.get_state()
        action = agent.choose_action(state)

        reward = 0
        cost = 0
        revenue = 0

        if action == "buy":
            if agent.balance >= environment.get_price() * BASE_AMOUNT:
                cost = environment.buy(BASE_AMOUNT)
                if agent.balance >= cost:
                    agent.balance -= cost
                    agent.inventory += BASE_AMOUNT
                    reward = agent.calculate_reward(action, cost=cost)
                else:
                    reward = agent.calculate_reward(action, cost=0)  # Not enough balance
            else:
                reward = agent.calculate_reward(action, cost=0)

        elif action == "sell":
            if agent.inventory >= BASE_AMOUNT:
                revenue = environment.sell(BASE_AMOUNT)
                if agent.inventory >= BASE_AMOUNT:
                    agent.balance += revenue
                    agent.inventory -= BASE_AMOUNT
                    reward = agent.calculate_reward(action, revenue=revenue)
                else:
                    reward = agent.calculate_reward(action, revenue=0)
            else:
                reward = agent.calculate_reward(action, revenue=0)

        elif action == "hold":
            reward = agent.calculate_reward(action) # No cost/revenue for hold

        new_state = environment.get_state()
        agent.update_q_table(state, action, reward, new_state)

        # Store results for analysis
        results.append(
            {
                "step": step,
                "state": state,
                "action": action,
                "reward": reward,
                "balance": agent.balance,
                "inventory": agent.inventory,
                "price": environment.get_price(),
                "total_supply": environment.total_supply
            }
        )

        # Update last state and action
        agent.last_action = action
        agent.last_market_state = state

        # Print current status every 50 steps
        if (step + 1) % 50 == 0:
            clear_output(wait=True)
            df_results = pd.DataFrame(results)
            display(df_results.tail(50))
            print(f"Step: {step+1}, Balance: {agent.balance:.2f}, Inventory: {agent.inventory}, Price: {environment.get_price():.2f}, Total Supply: {environment.total_supply}")
            time.sleep(0.1)  # Add a small delay for visualization

    return pd.DataFrame(results)

# --- Run the Simulation and Analyze Results ---

# Create an agent and environment
agent = Agent(agent_id="simulated_agent")
environment = MarketEnvironment()

# Run the simulation
df_results = run_simulation(agent, environment)

# Display the final Q-table
print("\nFinal Q-Table:")
q_table_df = pd.DataFrame.from_dict(agent.q_table, orient='index')
display(q_table_df)

# --- Further Analysis (Examples) ---
# You can add more analysis here, for example:

# 1. Plot the agent's balance over time:
# df_results.plot(x="step", y="balance", title="Agent's Balance Over Time")

# 2. Plot the price over time:
# df_results.plot(x="step", y="price", title="Market Price Over Time")

# 3. Analyze the frequency of each action:
# print("\nAction Frequency:")
# print(df_results["action"].value_counts())

Explanation of the Code:

  1. Simulation Parameters:

  2. Market Environment (Simplified):

  3. Agent:

  4. Simulation Loop:

  5. Run Simulation and Analyze:

How to Use in a Kaggle Notebook:

  1. Create a New Notebook: Go to Kaggle and create a new Python notebook.
  2. Copy and Paste: Copy the entire Python code provided above into a code cell in your notebook.
  3. Run the Code: Execute the code cell.

Output and Analysis:

Important Considerations:

By running this simulation and analyzing the results, you can gain valuable insights into your agent's behavior and make improvements before deploying it to the Solana blockchain. Remember that this is just a starting point, and you'll need to continue to refine and test your system as you move towards a production-ready implementation.