""" Continuous Generic Active Inference Implementation. This module implements a continuous-time active inference agent using classical mathematics and generalized coordinates. The implementation follows the original formulation using Taylor series approximations for belief updating and free energy minimization. Key Components: - Generalized coordinates: x = [x, x', x'', ...] - Generative model: p(o,x) = p(o|x)p(x) - Free Energy: F = -ln p(o,x) - Belief updating: dx/dt = D x - ∂F/∂x where D is the shift operator in generalized coordinates """ import numpy as np from scipy.integrate import solve_ivp from scipy.special import factorial from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple, Union import logging from pathlib import Path import json # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @dataclass class ContinuousState: """Class to hold the current state of the continuous active inference agent. The state is represented in generalized coordinates up to order n: x = [x, x', x'', ..., x^(n)] """ # Generalized coordinates of beliefs (means and precisions) belief_means: np.ndarray # Shape: [n_states, n_orders] belief_precisions: np.ndarray # Shape: [n_states, n_orders] # Time tracking time: float = 0.0 dt: float = 0.01 # Current true state (if available) true_state: Optional[np.ndarray] = None # History for tracking and visualization history: Dict[str, List] = field(default_factory=lambda: { 'observations': [], 'actions': [], 'belief_means': [], 'belief_precisions': [], 'free_energy': [], 'time': [], 'true_state': [] }) def __post_init__(self): """Validate state after initialization.""" if self.belief_means.shape != self.belief_precisions.shape: raise ValueError("Belief means and precisions must have same shape") if not np.all(self.belief_precisions > 0): raise ValueError("Belief precisions must be positive") def update(self, observation: np.ndarray, action: np.ndarray, free_energy: float) -> None: """Update state and history with new information.""" self.history['observations'].append(observation.copy()) self.history['actions'].append(action.copy()) self.history['belief_means'].append(self.belief_means.copy()) self.history['belief_precisions'].append(self.belief_precisions.copy()) self.history['free_energy'].append(float(free_energy)) self.history['time'].append(float(self.time)) if self.true_state is not None: self.history['true_state'].append(self.true_state.copy()) self.time += self.dt class ContinuousActiveInference: """Continuous-time active inference using generalized coordinates.""" def __init__(self, n_states: int, n_obs: int, n_orders: int = 3, dt: float = 0.01, alpha: float = 1.0, sigma: float = 1.0): """Initialize the continuous active inference agent. Args: n_states: Number of state dimensions n_obs: Number of observation dimensions n_orders: Number of generalized coordinate orders dt: Integration time step alpha: Learning rate for belief updating sigma: Action selection precision """ self.n_states = n_states self.n_obs = n_obs self.n_orders = n_orders self.dt = dt self.alpha = alpha self.sigma = sigma # Initialize state self.state = ContinuousState( belief_means=np.zeros((n_states, n_orders)), belief_precisions=np.ones((n_states, n_orders)), dt=dt ) # Create shift operator matrix D for generalized coordinates self.D = self._create_shift_operator() def _create_shift_operator(self) -> np.ndarray: """Create shift operator matrix for generalized coordinates. The shift operator D maps between orders of motion: D[x, x', x'', ...] = [x', x'', x''', ...] This implementation includes factorial scaling to properly handle the relationship between different orders of motion in the Taylor series. """ D = np.zeros((self.n_orders, self.n_orders)) for i in range(self.n_orders - 1): # Add factorial scaling for proper Taylor series representation D[i, i+1] = factorial(i+1) / factorial(i) return D def _sensory_mapping(self, states: np.ndarray) -> np.ndarray: """Map states to observations (g(x) in standard notation). This implements a nonlinear sensory mapping that considers multiple orders of motion. The mapping includes cross-terms between states to capture more complex relationships. """ # Get lowest order states x = states[:, 0] # Nonlinear observation mapping obs = np.zeros(self.n_obs) # First order terms obs = x.copy() # Add nonlinear terms if we have multiple states if self.n_states > 1: for i in range(self.n_states): for j in range(i+1, self.n_states): # Add cross terms obs[i] += 0.1 * x[i] * x[j] obs[j] += 0.1 * x[i] * x[j] # Add quadratic terms obs[i] += 0.05 * x[i]**2 return obs def _flow_mapping(self, states: np.ndarray, action: np.ndarray) -> np.ndarray: """Map states and action to state flow (f(x,a) in standard notation). This implements a nonlinear dynamical system with: 1. Conservative forces (derived from a potential) 2. Dissipative forces (friction) 3. Action-dependent forcing 4. Cross-coupling between states """ flow = np.zeros_like(states) # Extract state components x = states[:, 0] # Position v = states[:, 1] # Velocity # Conservative forces (derived from quadratic potential) U = 0.5 * np.sum(x**2) # Quadratic potential F_conservative = -x # Gradient of quadratic potential # Dissipative forces (friction) F_dissipative = -0.1 * v # Action-dependent forcing F_action = action # Cross-coupling between states F_coupling = np.zeros_like(x) if self.n_states > 1: for i in range(self.n_states): for j in range(self.n_states): if i != j: F_coupling[i] += 0.05 * x[j] * v[j] # Update flow for each order flow[:, 0] = v # dx/dt = v flow[:, 1] = F_conservative + F_dissipative + F_action + F_coupling # dv/dt = F/m # Higher order flows (if any) for i in range(2, self.n_orders): flow[:, i] = np.zeros_like(x) # Higher orders evolve freely return flow def _compute_free_energy(self, obs: np.ndarray, states: np.ndarray, action: Optional[np.ndarray] = None) -> float: """Compute variational free energy. F = -ln p(o,x) = -ln p(o|x) - ln p(x) This implementation includes: 1. Accuracy term (prediction error) 2. Complexity term (KL divergence from prior) 3. Expected flow term 4. Entropy term """ # 1. Accuracy term (prediction error) pred_obs = self._sensory_mapping(states) sensory_pe = 0.5 * np.sum((obs - pred_obs)**2) # 2. Complexity term (KL divergence from prior) # Assume Gaussian prior with zero mean and unit precision complexity = 0.5 * np.sum(states[:, 0]**2) # 3. Expected flow term if action is not None: pred_flow = self._flow_mapping(states, action) # Compare predicted flow with actual generalized motion flow_pe = 0.0 for i in range(self.n_orders-1): flow_error = states[:, i+1] - pred_flow[:, i] flow_pe += 0.5 * np.sum(flow_error**2) else: flow_pe = 0.0 # 4. Entropy term (encourage exploration) # Use precision-weighted entropy entropy = -0.5 * np.sum(np.log(self.state.belief_precisions + 1e-8)) # Combine all terms free_energy = sensory_pe + complexity + flow_pe - entropy return free_energy def _update_beliefs(self, obs: np.ndarray, action: Optional[np.ndarray] = None, n_steps: int = 10) -> Tuple[np.ndarray, float]: """Update beliefs using generalized descent on free energy. dx/dt = D x - ∂F/∂x This implementation uses: 1. Gradient descent with momentum 2. Adaptive learning rates 3. Precision weighting of updates 4. Natural gradient in belief space Args: obs: Observation action: Optional action n_steps: Number of integration steps Returns: Updated beliefs and free energy """ states = self.state.belief_means.copy() velocities = np.zeros_like(states) # For momentum learning_rates = np.ones_like(states) # Adaptive learning rates beta1 = 0.9 # Momentum parameter beta2 = 0.999 # RMSprop parameter epsilon = 1e-8 # Numerical stability for step in range(n_steps): # Compute free energy and gradients F = self._compute_free_energy(obs, states, action) # Compute gradients using natural gradient # dF = ∇F = Σ^(-1) * ∂F/∂x where Σ is the precision matrix dF = np.zeros_like(states) eps = 1e-6 for i in range(self.n_states): for j in range(self.n_orders): states[i,j] += eps Fp = self._compute_free_energy(obs, states, action) states[i,j] -= 2*eps Fm = self._compute_free_energy(obs, states, action) states[i,j] += eps dF[i,j] = (Fp - Fm)/(2*eps) # Update precisions based on prediction errors pred_obs = self._sensory_mapping(states) obs_error = obs - pred_obs # Update precisions for all orders based on lowest order error for i in range(self.n_orders): self.state.belief_precisions[:, i] = 1.0 / (obs_error**2 + epsilon) # Precision weighting of gradients dF = dF / (self.state.belief_precisions + epsilon) # Generalized motion dstates = np.dot(states, self.D.T) # Update velocities (momentum) velocities = beta1 * velocities + (1 - beta1) * (dstates - self.alpha * dF) # Update learning rates (RMSprop) learning_rates = beta2 * learning_rates + (1 - beta2) * dF**2 # Compute effective step size effective_lr = self.dt / (np.sqrt(learning_rates) + epsilon) # Update states states += effective_lr * velocities self.state.belief_means = states return states, F def _compute_expected_free_energy(self, states: np.ndarray, action: np.ndarray) -> float: """Compute expected free energy for action selection. G = E_q(s')[F(s') + KL[q(s')||p(s')]] - H[q(o'|s')] This includes: 1. Expected accuracy 2. Expected complexity 3. Expected information gain 4. Goal-directed behavior """ # Predict next state flow = self._flow_mapping(states, action) next_states = states + self.dt * flow # 1. Expected accuracy pred_obs = self._sensory_mapping(next_states) # Use current observation as proxy for desired observation desired_obs = self._sensory_mapping(states) # Could be replaced with actual goals expected_accuracy = 0.5 * np.sum((pred_obs - desired_obs)**2) # 2. Expected complexity expected_complexity = 0.5 * np.sum(next_states[:, 0]**2 + np.log(2*np.pi)) # 3. Expected information gain # Approximate using entropy of predicted observations pred_var = 1.0 / (self.state.belief_precisions[:, 0] + 1e-8) information_gain = 0.5 * np.sum(np.log(2*np.pi*np.e*pred_var)) # 4. Goal-directed term # Add specific goals here if available goal_term = 0.0 # Combine terms with weights w_accuracy = 1.0 w_complexity = 0.1 w_info_gain = 0.5 w_goal = 1.0 G = (w_accuracy * expected_accuracy + w_complexity * expected_complexity - w_info_gain * information_gain + w_goal * goal_term) return G def _select_action(self, obs: np.ndarray) -> np.ndarray: """Select action by minimizing expected free energy. This implementation uses: 1. Path integral control 2. Multiple action candidates 3. Momentum-based optimization 4. Adaptive exploration """ n_candidates = 10 # Number of action candidates horizon = 5 # Planning horizon # Initialize action candidates best_action = np.zeros(self.n_states) best_value = np.inf action_std = np.sqrt(1.0 / (self.sigma + 1e-8)) # Exploration noise # Path integral control for _ in range(n_candidates): # Sample action trajectory action = np.random.normal(0, action_std, size=self.n_states) total_value = 0 # Simulate forward states = self.state.belief_means.copy() for t in range(horizon): # Compute value for this step value = self._compute_expected_free_energy(states, action) total_value += value * (0.9**t) # Discount factor # Simulate forward flow = self._flow_mapping(states, action) states += self.dt * flow # Update best action if total_value < best_value: best_value = total_value best_action = action # Add exploration noise based on uncertainty exploration_noise = np.random.normal( 0, 1.0 / (self.sigma * np.sqrt(self.state.belief_precisions[:, 0] + 1e-8)) ) best_action += exploration_noise return best_action def step(self, obs: np.ndarray) -> Tuple[np.ndarray, float]: """Take a step in the environment. Args: obs: Observation Returns: Tuple of (action, free energy) """ # Update beliefs _, free_energy = self._update_beliefs(obs) # Select action action = self._select_action(obs) # Update state self.state.update(obs, action, free_energy) return action, free_energy def save_state(self, path: Union[str, Path]) -> None: """Save current state to file.""" path = Path(path) state_dict = { 'belief_means': self.state.belief_means.tolist(), 'belief_precisions': self.state.belief_precisions.tolist(), 'time': float(self.state.time), 'dt': float(self.state.dt), 'true_state': None if self.state.true_state is None else self.state.true_state.tolist(), 'history': { 'observations': [o.tolist() for o in self.state.history['observations']], 'actions': [a.tolist() for a in self.state.history['actions']], 'belief_means': [m.tolist() for m in self.state.history['belief_means']], 'belief_precisions': [p.tolist() for p in self.state.history['belief_precisions']], 'free_energy': self.state.history['free_energy'], 'time': self.state.history['time'], 'true_state': [s.tolist() for s in self.state.history['true_state']] if self.state.history['true_state'] else [] } } with open(path, 'w') as f: json.dump(state_dict, f, indent=2) def load_state(self, path: Union[str, Path]) -> None: """Load state from file.""" path = Path(path) with open(path, 'r') as f: state_dict = json.load(f) self.state = ContinuousState( belief_means=np.array(state_dict['belief_means']), belief_precisions=np.array(state_dict['belief_precisions']), time=float(state_dict['time']), dt=float(state_dict['dt']), true_state=None if state_dict['true_state'] is None else np.array(state_dict['true_state']) ) self.state.history = { 'observations': [np.array(o) for o in state_dict['history']['observations']], 'actions': [np.array(a) for a in state_dict['history']['actions']], 'belief_means': [np.array(m) for m in state_dict['history']['belief_means']], 'belief_precisions': [np.array(p) for p in state_dict['history']['belief_precisions']], 'free_energy': state_dict['history']['free_energy'], 'time': state_dict['history']['time'], 'true_state': [np.array(s) for s in state_dict['history']['true_state']] if state_dict['history']['true_state'] else [] } def reset(self) -> None: """Reset the agent to initial state.""" self.state = ContinuousState( belief_means=np.zeros((self.n_states, self.n_orders)), belief_precisions=np.ones((self.n_states, self.n_orders)), dt=self.dt )