зеркало из
				https://github.com/docxology/cognitive.git
				synced 2025-10-31 05:06:04 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			492 строки
		
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			492 строки
		
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| Continuous Generic Active Inference Implementation.
 | |
| 
 | |
| This module implements a continuous-time active inference agent using classical
 | |
| mathematics and generalized coordinates. The implementation follows the original
 | |
| formulation using Taylor series approximations for belief updating and free energy
 | |
| minimization.
 | |
| 
 | |
| Key Components:
 | |
|     - Generalized coordinates: x = [x, x', x'', ...]
 | |
|     - Generative model: p(o,x) = p(o|x)p(x)
 | |
|     - Free Energy: F = -ln p(o,x)
 | |
|     - Belief updating: dx/dt = D x - ∂F/∂x
 | |
|     where D is the shift operator in generalized coordinates
 | |
| """
 | |
| 
 | |
| import numpy as np
 | |
| from scipy.integrate import solve_ivp
 | |
| from scipy.special import factorial
 | |
| from dataclasses import dataclass, field
 | |
| from typing import Dict, List, Optional, Tuple, Union
 | |
| import logging
 | |
| from pathlib import Path
 | |
| import json
 | |
| 
 | |
| # Configure logging
 | |
| logging.basicConfig(level=logging.INFO)
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| @dataclass
 | |
| class ContinuousState:
 | |
|     """Class to hold the current state of the continuous active inference agent.
 | |
|     
 | |
|     The state is represented in generalized coordinates up to order n:
 | |
|     x = [x, x', x'', ..., x^(n)]
 | |
|     """
 | |
|     
 | |
|     # Generalized coordinates of beliefs (means and precisions)
 | |
|     belief_means: np.ndarray  # Shape: [n_states, n_orders]
 | |
|     belief_precisions: np.ndarray  # Shape: [n_states, n_orders]
 | |
|     
 | |
|     # Time tracking
 | |
|     time: float = 0.0
 | |
|     dt: float = 0.01
 | |
|     
 | |
|     # Current true state (if available)
 | |
|     true_state: Optional[np.ndarray] = None
 | |
|     
 | |
|     # History for tracking and visualization
 | |
|     history: Dict[str, List] = field(default_factory=lambda: {
 | |
|         'observations': [],
 | |
|         'actions': [],
 | |
|         'belief_means': [],
 | |
|         'belief_precisions': [],
 | |
|         'free_energy': [],
 | |
|         'time': [],
 | |
|         'true_state': []
 | |
|     })
 | |
|     
 | |
|     def __post_init__(self):
 | |
|         """Validate state after initialization."""
 | |
|         if self.belief_means.shape != self.belief_precisions.shape:
 | |
|             raise ValueError("Belief means and precisions must have same shape")
 | |
|         if not np.all(self.belief_precisions > 0):
 | |
|             raise ValueError("Belief precisions must be positive")
 | |
| 
 | |
|     def update(self, 
 | |
|                observation: np.ndarray,
 | |
|                action: np.ndarray,
 | |
|                free_energy: float) -> None:
 | |
|         """Update state and history with new information."""
 | |
|         self.history['observations'].append(observation.copy())
 | |
|         self.history['actions'].append(action.copy())
 | |
|         self.history['belief_means'].append(self.belief_means.copy())
 | |
|         self.history['belief_precisions'].append(self.belief_precisions.copy())
 | |
|         self.history['free_energy'].append(float(free_energy))
 | |
|         self.history['time'].append(float(self.time))
 | |
|         if self.true_state is not None:
 | |
|             self.history['true_state'].append(self.true_state.copy())
 | |
|         self.time += self.dt
 | |
| 
 | |
| class ContinuousActiveInference:
 | |
|     """Continuous-time active inference using generalized coordinates."""
 | |
|     
 | |
|     def __init__(self,
 | |
|                  n_states: int,
 | |
|                  n_obs: int,
 | |
|                  n_orders: int = 3,
 | |
|                  dt: float = 0.01,
 | |
|                  alpha: float = 1.0,
 | |
|                  sigma: float = 1.0):
 | |
|         """Initialize the continuous active inference agent.
 | |
|         
 | |
|         Args:
 | |
|             n_states: Number of state dimensions
 | |
|             n_obs: Number of observation dimensions
 | |
|             n_orders: Number of generalized coordinate orders
 | |
|             dt: Integration time step
 | |
|             alpha: Learning rate for belief updating
 | |
|             sigma: Action selection precision
 | |
|         """
 | |
|         self.n_states = n_states
 | |
|         self.n_obs = n_obs
 | |
|         self.n_orders = n_orders
 | |
|         self.dt = dt
 | |
|         self.alpha = alpha
 | |
|         self.sigma = sigma
 | |
|         
 | |
|         # Initialize state
 | |
|         self.state = ContinuousState(
 | |
|             belief_means=np.zeros((n_states, n_orders)),
 | |
|             belief_precisions=np.ones((n_states, n_orders)),
 | |
|             dt=dt
 | |
|         )
 | |
|         
 | |
|         # Create shift operator matrix D for generalized coordinates
 | |
|         self.D = self._create_shift_operator()
 | |
|         
 | |
|     def _create_shift_operator(self) -> np.ndarray:
 | |
|         """Create shift operator matrix for generalized coordinates.
 | |
|         
 | |
|         The shift operator D maps between orders of motion:
 | |
|         D[x, x', x'', ...] = [x', x'', x''', ...]
 | |
|         
 | |
|         This implementation includes factorial scaling to properly handle
 | |
|         the relationship between different orders of motion in the Taylor series.
 | |
|         """
 | |
|         D = np.zeros((self.n_orders, self.n_orders))
 | |
|         for i in range(self.n_orders - 1):
 | |
|             # Add factorial scaling for proper Taylor series representation
 | |
|             D[i, i+1] = factorial(i+1) / factorial(i)
 | |
|         return D
 | |
|         
 | |
|     def _sensory_mapping(self, states: np.ndarray) -> np.ndarray:
 | |
|         """Map states to observations (g(x) in standard notation).
 | |
|         
 | |
|         This implements a nonlinear sensory mapping that considers multiple
 | |
|         orders of motion. The mapping includes cross-terms between states
 | |
|         to capture more complex relationships.
 | |
|         """
 | |
|         # Get lowest order states
 | |
|         x = states[:, 0]
 | |
|         
 | |
|         # Nonlinear observation mapping
 | |
|         obs = np.zeros(self.n_obs)
 | |
|         
 | |
|         # First order terms
 | |
|         obs = x.copy()
 | |
|         
 | |
|         # Add nonlinear terms if we have multiple states
 | |
|         if self.n_states > 1:
 | |
|             for i in range(self.n_states):
 | |
|                 for j in range(i+1, self.n_states):
 | |
|                     # Add cross terms
 | |
|                     obs[i] += 0.1 * x[i] * x[j]
 | |
|                     obs[j] += 0.1 * x[i] * x[j]
 | |
|                     
 | |
|                 # Add quadratic terms
 | |
|                 obs[i] += 0.05 * x[i]**2
 | |
|                 
 | |
|         return obs
 | |
|         
 | |
|     def _flow_mapping(self, states: np.ndarray, action: np.ndarray) -> np.ndarray:
 | |
|         """Map states and action to state flow (f(x,a) in standard notation).
 | |
|         
 | |
|         This implements a nonlinear dynamical system with:
 | |
|         1. Conservative forces (derived from a potential)
 | |
|         2. Dissipative forces (friction)
 | |
|         3. Action-dependent forcing
 | |
|         4. Cross-coupling between states
 | |
|         """
 | |
|         flow = np.zeros_like(states)
 | |
|         
 | |
|         # Extract state components
 | |
|         x = states[:, 0]  # Position
 | |
|         v = states[:, 1]  # Velocity
 | |
|         
 | |
|         # Conservative forces (derived from quadratic potential)
 | |
|         U = 0.5 * np.sum(x**2)  # Quadratic potential
 | |
|         F_conservative = -x  # Gradient of quadratic potential
 | |
|         
 | |
|         # Dissipative forces (friction)
 | |
|         F_dissipative = -0.1 * v
 | |
|         
 | |
|         # Action-dependent forcing
 | |
|         F_action = action
 | |
|         
 | |
|         # Cross-coupling between states
 | |
|         F_coupling = np.zeros_like(x)
 | |
|         if self.n_states > 1:
 | |
|             for i in range(self.n_states):
 | |
|                 for j in range(self.n_states):
 | |
|                     if i != j:
 | |
|                         F_coupling[i] += 0.05 * x[j] * v[j]
 | |
|         
 | |
|         # Update flow for each order
 | |
|         flow[:, 0] = v  # dx/dt = v
 | |
|         flow[:, 1] = F_conservative + F_dissipative + F_action + F_coupling  # dv/dt = F/m
 | |
|         
 | |
|         # Higher order flows (if any)
 | |
|         for i in range(2, self.n_orders):
 | |
|             flow[:, i] = np.zeros_like(x)  # Higher orders evolve freely
 | |
|             
 | |
|         return flow
 | |
|         
 | |
|     def _compute_free_energy(self,
 | |
|                            obs: np.ndarray,
 | |
|                            states: np.ndarray,
 | |
|                            action: Optional[np.ndarray] = None) -> float:
 | |
|         """Compute variational free energy.
 | |
|         
 | |
|         F = -ln p(o,x) = -ln p(o|x) - ln p(x)
 | |
|         
 | |
|         This implementation includes:
 | |
|         1. Accuracy term (prediction error)
 | |
|         2. Complexity term (KL divergence from prior)
 | |
|         3. Expected flow term
 | |
|         4. Entropy term
 | |
|         """
 | |
|         # 1. Accuracy term (prediction error)
 | |
|         pred_obs = self._sensory_mapping(states)
 | |
|         sensory_pe = 0.5 * np.sum((obs - pred_obs)**2)
 | |
|         
 | |
|         # 2. Complexity term (KL divergence from prior)
 | |
|         # Assume Gaussian prior with zero mean and unit precision
 | |
|         complexity = 0.5 * np.sum(states[:, 0]**2)
 | |
|         
 | |
|         # 3. Expected flow term
 | |
|         if action is not None:
 | |
|             pred_flow = self._flow_mapping(states, action)
 | |
|             # Compare predicted flow with actual generalized motion
 | |
|             flow_pe = 0.0
 | |
|             for i in range(self.n_orders-1):
 | |
|                 flow_error = states[:, i+1] - pred_flow[:, i]
 | |
|                 flow_pe += 0.5 * np.sum(flow_error**2)
 | |
|         else:
 | |
|             flow_pe = 0.0
 | |
|             
 | |
|         # 4. Entropy term (encourage exploration)
 | |
|         # Use precision-weighted entropy
 | |
|         entropy = -0.5 * np.sum(np.log(self.state.belief_precisions + 1e-8))
 | |
|         
 | |
|         # Combine all terms
 | |
|         free_energy = sensory_pe + complexity + flow_pe - entropy
 | |
|         
 | |
|         return free_energy
 | |
|         
 | |
|     def _update_beliefs(self,
 | |
|                        obs: np.ndarray,
 | |
|                        action: Optional[np.ndarray] = None,
 | |
|                        n_steps: int = 10) -> Tuple[np.ndarray, float]:
 | |
|         """Update beliefs using generalized descent on free energy.
 | |
|         
 | |
|         dx/dt = D x - ∂F/∂x
 | |
|         
 | |
|         This implementation uses:
 | |
|         1. Gradient descent with momentum
 | |
|         2. Adaptive learning rates
 | |
|         3. Precision weighting of updates
 | |
|         4. Natural gradient in belief space
 | |
|         
 | |
|         Args:
 | |
|             obs: Observation
 | |
|             action: Optional action
 | |
|             n_steps: Number of integration steps
 | |
|             
 | |
|         Returns:
 | |
|             Updated beliefs and free energy
 | |
|         """
 | |
|         states = self.state.belief_means.copy()
 | |
|         velocities = np.zeros_like(states)  # For momentum
 | |
|         learning_rates = np.ones_like(states)  # Adaptive learning rates
 | |
|         
 | |
|         beta1 = 0.9  # Momentum parameter
 | |
|         beta2 = 0.999  # RMSprop parameter
 | |
|         epsilon = 1e-8  # Numerical stability
 | |
|         
 | |
|         for step in range(n_steps):
 | |
|             # Compute free energy and gradients
 | |
|             F = self._compute_free_energy(obs, states, action)
 | |
|             
 | |
|             # Compute gradients using natural gradient
 | |
|             # dF = ∇F = Σ^(-1) * ∂F/∂x where Σ is the precision matrix
 | |
|             dF = np.zeros_like(states)
 | |
|             eps = 1e-6
 | |
|             for i in range(self.n_states):
 | |
|                 for j in range(self.n_orders):
 | |
|                     states[i,j] += eps
 | |
|                     Fp = self._compute_free_energy(obs, states, action)
 | |
|                     states[i,j] -= 2*eps
 | |
|                     Fm = self._compute_free_energy(obs, states, action)
 | |
|                     states[i,j] += eps
 | |
|                     dF[i,j] = (Fp - Fm)/(2*eps)
 | |
|             
 | |
|             # Update precisions based on prediction errors
 | |
|             pred_obs = self._sensory_mapping(states)
 | |
|             obs_error = obs - pred_obs
 | |
|             # Update precisions for all orders based on lowest order error
 | |
|             for i in range(self.n_orders):
 | |
|                 self.state.belief_precisions[:, i] = 1.0 / (obs_error**2 + epsilon)
 | |
|             
 | |
|             # Precision weighting of gradients
 | |
|             dF = dF / (self.state.belief_precisions + epsilon)
 | |
|             
 | |
|             # Generalized motion
 | |
|             dstates = np.dot(states, self.D.T)
 | |
|             
 | |
|             # Update velocities (momentum)
 | |
|             velocities = beta1 * velocities + (1 - beta1) * (dstates - self.alpha * dF)
 | |
|             
 | |
|             # Update learning rates (RMSprop)
 | |
|             learning_rates = beta2 * learning_rates + (1 - beta2) * dF**2
 | |
|             
 | |
|             # Compute effective step size
 | |
|             effective_lr = self.dt / (np.sqrt(learning_rates) + epsilon)
 | |
|             
 | |
|             # Update states
 | |
|             states += effective_lr * velocities
 | |
|             
 | |
|         self.state.belief_means = states
 | |
|         return states, F
 | |
|         
 | |
|     def _compute_expected_free_energy(self,
 | |
|                                     states: np.ndarray,
 | |
|                                     action: np.ndarray) -> float:
 | |
|         """Compute expected free energy for action selection.
 | |
|         
 | |
|         G = E_q(s')[F(s') + KL[q(s')||p(s')]] - H[q(o'|s')]
 | |
|         
 | |
|         This includes:
 | |
|         1. Expected accuracy
 | |
|         2. Expected complexity
 | |
|         3. Expected information gain
 | |
|         4. Goal-directed behavior
 | |
|         """
 | |
|         # Predict next state
 | |
|         flow = self._flow_mapping(states, action)
 | |
|         next_states = states + self.dt * flow
 | |
|         
 | |
|         # 1. Expected accuracy
 | |
|         pred_obs = self._sensory_mapping(next_states)
 | |
|         # Use current observation as proxy for desired observation
 | |
|         desired_obs = self._sensory_mapping(states)  # Could be replaced with actual goals
 | |
|         expected_accuracy = 0.5 * np.sum((pred_obs - desired_obs)**2)
 | |
|         
 | |
|         # 2. Expected complexity
 | |
|         expected_complexity = 0.5 * np.sum(next_states[:, 0]**2 + np.log(2*np.pi))
 | |
|         
 | |
|         # 3. Expected information gain
 | |
|         # Approximate using entropy of predicted observations
 | |
|         pred_var = 1.0 / (self.state.belief_precisions[:, 0] + 1e-8)
 | |
|         information_gain = 0.5 * np.sum(np.log(2*np.pi*np.e*pred_var))
 | |
|         
 | |
|         # 4. Goal-directed term
 | |
|         # Add specific goals here if available
 | |
|         goal_term = 0.0
 | |
|         
 | |
|         # Combine terms with weights
 | |
|         w_accuracy = 1.0
 | |
|         w_complexity = 0.1
 | |
|         w_info_gain = 0.5
 | |
|         w_goal = 1.0
 | |
|         
 | |
|         G = (w_accuracy * expected_accuracy +
 | |
|              w_complexity * expected_complexity -
 | |
|              w_info_gain * information_gain +
 | |
|              w_goal * goal_term)
 | |
|              
 | |
|         return G
 | |
|         
 | |
|     def _select_action(self, obs: np.ndarray) -> np.ndarray:
 | |
|         """Select action by minimizing expected free energy.
 | |
|         
 | |
|         This implementation uses:
 | |
|         1. Path integral control
 | |
|         2. Multiple action candidates
 | |
|         3. Momentum-based optimization
 | |
|         4. Adaptive exploration
 | |
|         """
 | |
|         n_candidates = 10  # Number of action candidates
 | |
|         horizon = 5  # Planning horizon
 | |
|         
 | |
|         # Initialize action candidates
 | |
|         best_action = np.zeros(self.n_states)
 | |
|         best_value = np.inf
 | |
|         action_std = np.sqrt(1.0 / (self.sigma + 1e-8))  # Exploration noise
 | |
|         
 | |
|         # Path integral control
 | |
|         for _ in range(n_candidates):
 | |
|             # Sample action trajectory
 | |
|             action = np.random.normal(0, action_std, size=self.n_states)
 | |
|             total_value = 0
 | |
|             
 | |
|             # Simulate forward
 | |
|             states = self.state.belief_means.copy()
 | |
|             for t in range(horizon):
 | |
|                 # Compute value for this step
 | |
|                 value = self._compute_expected_free_energy(states, action)
 | |
|                 total_value += value * (0.9**t)  # Discount factor
 | |
|                 
 | |
|                 # Simulate forward
 | |
|                 flow = self._flow_mapping(states, action)
 | |
|                 states += self.dt * flow
 | |
|             
 | |
|             # Update best action
 | |
|             if total_value < best_value:
 | |
|                 best_value = total_value
 | |
|                 best_action = action
 | |
|                 
 | |
|         # Add exploration noise based on uncertainty
 | |
|         exploration_noise = np.random.normal(
 | |
|             0,
 | |
|             1.0 / (self.sigma * np.sqrt(self.state.belief_precisions[:, 0] + 1e-8))
 | |
|         )
 | |
|         best_action += exploration_noise
 | |
|         
 | |
|         return best_action
 | |
|         
 | |
|     def step(self, obs: np.ndarray) -> Tuple[np.ndarray, float]:
 | |
|         """Take a step in the environment.
 | |
|         
 | |
|         Args:
 | |
|             obs: Observation
 | |
|             
 | |
|         Returns:
 | |
|             Tuple of (action, free energy)
 | |
|         """
 | |
|         # Update beliefs
 | |
|         _, free_energy = self._update_beliefs(obs)
 | |
|         
 | |
|         # Select action
 | |
|         action = self._select_action(obs)
 | |
|         
 | |
|         # Update state
 | |
|         self.state.update(obs, action, free_energy)
 | |
|         
 | |
|         return action, free_energy
 | |
|         
 | |
|     def save_state(self, path: Union[str, Path]) -> None:
 | |
|         """Save current state to file."""
 | |
|         path = Path(path)
 | |
|         state_dict = {
 | |
|             'belief_means': self.state.belief_means.tolist(),
 | |
|             'belief_precisions': self.state.belief_precisions.tolist(),
 | |
|             'time': float(self.state.time),
 | |
|             'dt': float(self.state.dt),
 | |
|             'true_state': None if self.state.true_state is None else self.state.true_state.tolist(),
 | |
|             'history': {
 | |
|                 'observations': [o.tolist() for o in self.state.history['observations']],
 | |
|                 'actions': [a.tolist() for a in self.state.history['actions']],
 | |
|                 'belief_means': [m.tolist() for m in self.state.history['belief_means']],
 | |
|                 'belief_precisions': [p.tolist() for p in self.state.history['belief_precisions']],
 | |
|                 'free_energy': self.state.history['free_energy'],
 | |
|                 'time': self.state.history['time'],
 | |
|                 'true_state': [s.tolist() for s in self.state.history['true_state']] if self.state.history['true_state'] else []
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         with open(path, 'w') as f:
 | |
|             json.dump(state_dict, f, indent=2)
 | |
|             
 | |
|     def load_state(self, path: Union[str, Path]) -> None:
 | |
|         """Load state from file."""
 | |
|         path = Path(path)
 | |
|         with open(path, 'r') as f:
 | |
|             state_dict = json.load(f)
 | |
|         
 | |
|         self.state = ContinuousState(
 | |
|             belief_means=np.array(state_dict['belief_means']),
 | |
|             belief_precisions=np.array(state_dict['belief_precisions']),
 | |
|             time=float(state_dict['time']),
 | |
|             dt=float(state_dict['dt']),
 | |
|             true_state=None if state_dict['true_state'] is None else np.array(state_dict['true_state'])
 | |
|         )
 | |
|         
 | |
|         self.state.history = {
 | |
|             'observations': [np.array(o) for o in state_dict['history']['observations']],
 | |
|             'actions': [np.array(a) for a in state_dict['history']['actions']],
 | |
|             'belief_means': [np.array(m) for m in state_dict['history']['belief_means']],
 | |
|             'belief_precisions': [np.array(p) for p in state_dict['history']['belief_precisions']],
 | |
|             'free_energy': state_dict['history']['free_energy'],
 | |
|             'time': state_dict['history']['time'],
 | |
|             'true_state': [np.array(s) for s in state_dict['history']['true_state']] if state_dict['history']['true_state'] else []
 | |
|         }
 | |
|         
 | |
|     def reset(self) -> None:
 | |
|         """Reset the agent to initial state."""
 | |
|         self.state = ContinuousState(
 | |
|             belief_means=np.zeros((self.n_states, self.n_orders)),
 | |
|             belief_precisions=np.ones((self.n_states, self.n_orders)),
 | |
|             dt=self.dt
 | |
|         )  | 
