зеркало из
				https://github.com/docxology/cognitive.git
				synced 2025-10-31 05:06:04 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			568 строки
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			568 строки
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| Comprehensive test runner for Generic POMDP implementation.
 | |
| """
 | |
| 
 | |
| import pytest
 | |
| import numpy as np
 | |
| import logging
 | |
| import json
 | |
| from pathlib import Path
 | |
| from datetime import datetime
 | |
| from typing import Dict, List, Optional
 | |
| 
 | |
| from generic_pomdp import GenericPOMDP
 | |
| from visualization import POMDPVisualizer
 | |
| 
 | |
| # Configure logging
 | |
| logging.basicConfig(
 | |
|     level=logging.INFO,
 | |
|     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | |
|     handlers=[
 | |
|         logging.FileHandler('test_results.log'),
 | |
|         logging.StreamHandler()
 | |
|     ]
 | |
| )
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| class TestRunner:
 | |
|     """Comprehensive test runner with detailed logging."""
 | |
|     
 | |
|     def __init__(self):
 | |
|         """Initialize test runner."""
 | |
|         self.output_dir = Path("Output")
 | |
|         self.output_dir.mkdir(exist_ok=True)
 | |
|         
 | |
|         self.results = {
 | |
|             'timestamp': datetime.now().isoformat(),
 | |
|             'tests': {},
 | |
|             'metrics': {},
 | |
|             'summary': {}
 | |
|         }
 | |
|         
 | |
|     def run_all_tests(self):
 | |
|         """Run all tests with comprehensive logging."""
 | |
|         logger.info("Starting comprehensive test suite...")
 | |
|         
 | |
|         try:
 | |
|             # Run initialization tests
 | |
|             self._run_test_group("initialization", [
 | |
|                 self._test_model_initialization,
 | |
|                 self._test_matrix_properties,
 | |
|                 self._test_state_initialization
 | |
|             ])
 | |
|             
 | |
|             # Run dynamics tests
 | |
|             self._run_test_group("dynamics", [
 | |
|                 self._test_step_without_action,
 | |
|                 self._test_step_with_action,
 | |
|                 self._test_belief_updating,
 | |
|                 self._test_action_selection
 | |
|             ])
 | |
|             
 | |
|             # Run learning tests
 | |
|             self._run_test_group("learning", [
 | |
|                 self._test_learning_dynamics,
 | |
|                 self._test_preference_influence,
 | |
|                 self._test_convergence
 | |
|             ])
 | |
|             
 | |
|             # Run numerical stability tests
 | |
|             self._run_test_group("numerical_stability", [
 | |
|                 self._test_numerical_stability,
 | |
|                 self._test_edge_cases
 | |
|             ])
 | |
|             
 | |
|             # Run persistence tests
 | |
|             self._run_test_group("persistence", [
 | |
|                 self._test_save_load_state
 | |
|             ])
 | |
|             
 | |
|             # Run visualization tests
 | |
|             self._run_test_group("visualization", [
 | |
|                 self._test_visualization_outputs
 | |
|             ])
 | |
|             
 | |
|             # Compute summary metrics
 | |
|             self._compute_summary()
 | |
|             
 | |
|             # Save results
 | |
|             self._save_results()
 | |
|             
 | |
|             logger.info("Test suite completed successfully")
 | |
|             return True
 | |
|             
 | |
|         except Exception as e:
 | |
|             logger.error(f"Test suite failed: {str(e)}", exc_info=True)
 | |
|             return False
 | |
|     
 | |
|     def _run_test_group(self, group_name: str, test_functions: List[callable]):
 | |
|         """Run a group of related tests.
 | |
|         
 | |
|         Args:
 | |
|             group_name: Name of the test group
 | |
|             test_functions: List of test functions to run
 | |
|         """
 | |
|         logger.info(f"Running {group_name} tests...")
 | |
|         
 | |
|         self.results['tests'][group_name] = {
 | |
|             'passed': 0,
 | |
|             'failed': 0,
 | |
|             'details': {}
 | |
|         }
 | |
|         
 | |
|         for test_func in test_functions:
 | |
|             test_name = test_func.__name__
 | |
|             logger.info(f"Running test: {test_name}")
 | |
|             
 | |
|             try:
 | |
|                 metrics = test_func()
 | |
|                 self.results['tests'][group_name]['passed'] += 1
 | |
|                 self.results['tests'][group_name]['details'][test_name] = {
 | |
|                     'status': 'PASS',
 | |
|                     'metrics': metrics
 | |
|                 }
 | |
|                 logger.info(f"Test {test_name} PASSED")
 | |
|                 
 | |
|             except Exception as e:
 | |
|                 self.results['tests'][group_name]['failed'] += 1
 | |
|                 self.results['tests'][group_name]['details'][test_name] = {
 | |
|                     'status': 'FAIL',
 | |
|                     'error': str(e)
 | |
|                 }
 | |
|                 logger.error(f"Test {test_name} FAILED: {str(e)}", exc_info=True)
 | |
|     
 | |
|     def _test_model_initialization(self) -> Dict:
 | |
|         """Test model initialization."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         
 | |
|         metrics = {
 | |
|             'matrix_shapes': {
 | |
|                 'A': model.A.shape == (2, 3),
 | |
|                 'B': model.B.shape == (3, 3, 2),
 | |
|                 'C': model.C.shape == (2, 5),
 | |
|                 'D': model.D.shape == (3,),
 | |
|                 'E': model.E.shape == (2,)
 | |
|             },
 | |
|             'initial_state': {
 | |
|                 'beliefs_shape': model.state.beliefs.shape == (3,),
 | |
|                 'time_step': model.state.time_step == 0
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['matrix_shapes'].values()), "Invalid matrix shapes"
 | |
|         assert all(metrics['initial_state'].values()), "Invalid initial state"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_matrix_properties(self) -> Dict:
 | |
|         """Test matrix properties."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         
 | |
|         metrics = {
 | |
|             'A_matrix': {
 | |
|                 'column_stochastic': np.allclose(model.A.sum(axis=0), 1.0),
 | |
|                 'non_negative': np.all(model.A >= 0)
 | |
|             },
 | |
|             'B_matrix': {
 | |
|                 'action_0': {
 | |
|                     'column_stochastic': np.allclose(model.B[:,:,0].sum(axis=0), 1.0),
 | |
|                     'non_negative': np.all(model.B[:,:,0] >= 0)
 | |
|                 },
 | |
|                 'action_1': {
 | |
|                     'column_stochastic': np.allclose(model.B[:,:,1].sum(axis=0), 1.0),
 | |
|                     'non_negative': np.all(model.B[:,:,1] >= 0)
 | |
|                 }
 | |
|             },
 | |
|             'C_matrix': {
 | |
|                 'finite': np.all(np.isfinite(model.C))
 | |
|             },
 | |
|             'D_matrix': {
 | |
|                 'normalized': np.allclose(model.D.sum(), 1.0),
 | |
|                 'non_negative': np.all(model.D >= 0)
 | |
|             },
 | |
|             'E_matrix': {
 | |
|                 'normalized': np.allclose(model.E.sum(), 1.0),
 | |
|                 'non_negative': np.all(model.E >= 0)
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         # Verify all properties
 | |
|         assert all(metrics['A_matrix'].values()), "Invalid A matrix properties"
 | |
|         assert all(metrics['B_matrix']['action_0'].values()) and \
 | |
|                all(metrics['B_matrix']['action_1'].values()), "Invalid B matrix properties"
 | |
|         assert all(metrics['C_matrix'].values()), "Invalid C matrix properties"
 | |
|         assert all(metrics['D_matrix'].values()), "Invalid D matrix properties"
 | |
|         assert all(metrics['E_matrix'].values()), "Invalid E matrix properties"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_state_initialization(self) -> Dict:
 | |
|         """Test state initialization."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         
 | |
|         metrics = {
 | |
|             'beliefs': {
 | |
|                 'normalized': np.allclose(model.state.beliefs.sum(), 1.0),
 | |
|                 'non_negative': np.all(model.state.beliefs >= 0)
 | |
|             },
 | |
|             'history': {
 | |
|                 'observations_empty': len(model.state.history['observations']) == 0,
 | |
|                 'actions_empty': len(model.state.history['actions']) == 0,
 | |
|                 'beliefs_initial': len(model.state.history['beliefs']) == 1
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['beliefs'].values()), "Invalid initial beliefs"
 | |
|         assert all(metrics['history'].values()), "Invalid history initialization"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_step_without_action(self) -> Dict:
 | |
|         """Test stepping without providing action."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         observation, free_energy = model.step()
 | |
|         
 | |
|         metrics = {
 | |
|             'observation': {
 | |
|                 'valid_range': 0 <= observation < model.num_observations
 | |
|             },
 | |
|             'free_energy': {
 | |
|                 'finite': np.isfinite(free_energy)
 | |
|             },
 | |
|             'history': {
 | |
|                 'observations_updated': len(model.state.history['observations']) == 1,
 | |
|                 'actions_updated': len(model.state.history['actions']) == 1,
 | |
|                 'beliefs_updated': len(model.state.history['beliefs']) == 2
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['observation'].values()), "Invalid observation"
 | |
|         assert all(metrics['free_energy'].values()), "Invalid free energy"
 | |
|         assert all(metrics['history'].values()), "Invalid history update"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_step_with_action(self) -> Dict:
 | |
|         """Test stepping with provided action."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         action = 0
 | |
|         observation, free_energy = model.step(action)
 | |
|         
 | |
|         metrics = {
 | |
|             'observation': {
 | |
|                 'valid_range': 0 <= observation < model.num_observations
 | |
|             },
 | |
|             'free_energy': {
 | |
|                 'finite': np.isfinite(free_energy)
 | |
|             },
 | |
|             'action': {
 | |
|                 'correct_action': model.state.history['actions'][-1] == action
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['observation'].values()), "Invalid observation"
 | |
|         assert all(metrics['free_energy'].values()), "Invalid free energy"
 | |
|         assert all(metrics['action'].values()), "Invalid action"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_belief_updating(self) -> Dict:
 | |
|         """Test belief updating mechanism."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         initial_beliefs = model.state.beliefs.copy()
 | |
|         model.step()
 | |
|         
 | |
|         metrics = {
 | |
|             'belief_change': {
 | |
|                 'updated': not np.allclose(model.state.beliefs, initial_beliefs),
 | |
|                 'normalized': np.allclose(model.state.beliefs.sum(), 1.0),
 | |
|                 'non_negative': np.all(model.state.beliefs >= 0)
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['belief_change'].values()), "Invalid belief update"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_action_selection(self) -> Dict:
 | |
|         """Test action selection mechanism."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         action, probs = model._select_action()
 | |
|         
 | |
|         metrics = {
 | |
|             'action': {
 | |
|                 'valid_range': 0 <= action < model.num_actions
 | |
|             },
 | |
|             'probabilities': {
 | |
|                 'normalized': np.allclose(probs.sum(), 1.0),
 | |
|                 'non_negative': np.all(probs >= 0),
 | |
|                 'correct_shape': len(probs) == model.num_actions
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['action'].values()), "Invalid action"
 | |
|         assert all(metrics['probabilities'].values()), "Invalid action probabilities"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_learning_dynamics(self) -> Dict:
 | |
|         """Test learning dynamics over multiple steps."""
 | |
|         model = GenericPOMDP(num_observations=4, num_states=5, num_actions=3)
 | |
|         
 | |
|         # Track belief entropy
 | |
|         entropies = []
 | |
|         n_steps = 20
 | |
|         
 | |
|         for _ in range(n_steps):
 | |
|             model.step()
 | |
|             entropy = -np.sum(model.state.beliefs *
 | |
|                             np.log(model.state.beliefs +
 | |
|                                   model.stability_threshold))
 | |
|             entropies.append(entropy)
 | |
|         
 | |
|         metrics = {
 | |
|             'entropy': {
 | |
|                 'initial_mean': float(np.mean(entropies[:5])),
 | |
|                 'final_mean': float(np.mean(entropies[-5:])),
 | |
|                 'decreased': np.mean(entropies[:5]) > np.mean(entropies[-5:])
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert metrics['entropy']['decreased'], "No learning progress observed"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_preference_influence(self) -> Dict:
 | |
|         """Test influence of preferences on action selection."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         
 | |
|         # Set strong preference for first observation
 | |
|         model.C = np.zeros((model.num_observations, model.time_horizon))
 | |
|         model.C[0,:] = 2.0  # Strong preference for first observation
 | |
|         model.C[1,:] = -2.0  # Avoid second observation
 | |
|         
 | |
|         # Set deterministic observation model
 | |
|         model.A = np.zeros((model.num_observations, model.num_states))
 | |
|         model.A[0,0] = 1.0  # First state -> first observation
 | |
|         model.A[1,1] = 1.0  # Second state -> second observation
 | |
|         model.A[:,2] = 0.5  # Third state -> random observation
 | |
|         
 | |
|         # Set controllable transition model
 | |
|         model.B[:,:,0] = np.eye(model.num_states)  # Action 0: stay in place
 | |
|         model.B[:,:,1] = np.roll(np.eye(model.num_states), 1, axis=1)  # Action 1: cycle states
 | |
|         
 | |
|         observations = []
 | |
|         n_steps = 50  # More steps to ensure preference influence
 | |
|         
 | |
|         for _ in range(n_steps):
 | |
|             obs, _ = model.step()
 | |
|             observations.append(obs)
 | |
|         
 | |
|         obs_counts = np.bincount(observations)
 | |
|         
 | |
|         metrics = {
 | |
|             'observations': {
 | |
|                 'preferred_count': int(obs_counts[0]),
 | |
|                 'other_count': int(obs_counts[1]),
 | |
|                 'preference_followed': obs_counts[0] > obs_counts[1]
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert metrics['observations']['preference_followed'], \
 | |
|             "Preferences not influencing behavior"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_numerical_stability(self) -> Dict:
 | |
|         """Test numerical stability of computations."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         
 | |
|         # Set very small beliefs
 | |
|         model.state.beliefs = np.ones(model.num_states) * 1e-10
 | |
|         model.state.beliefs /= model.state.beliefs.sum()
 | |
|         
 | |
|         observation, free_energy = model.step()
 | |
|         
 | |
|         metrics = {
 | |
|             'free_energy': {
 | |
|                 'finite': np.isfinite(free_energy)
 | |
|             },
 | |
|             'beliefs': {
 | |
|                 'finite': np.all(np.isfinite(model.state.beliefs)),
 | |
|                 'normalized': np.allclose(model.state.beliefs.sum(), 1.0),
 | |
|                 'non_negative': np.all(model.state.beliefs >= 0)
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['free_energy'].values()), "Unstable free energy"
 | |
|         assert all(metrics['beliefs'].values()), "Unstable beliefs"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_convergence(self) -> Dict:
 | |
|         """Test convergence properties."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         
 | |
|         # Set deterministic transition and observation
 | |
|         model.B[:,:,0] = np.eye(model.num_states)  # Identity transitions
 | |
|         model.A = np.zeros((model.num_observations, model.num_states))
 | |
|         model.A[0,0] = 1.0  # First state -> first observation
 | |
|         model.A[1,1] = 1.0  # Second state -> second observation
 | |
|         
 | |
|         beliefs_history = []
 | |
|         n_steps = 10
 | |
|         
 | |
|         for _ in range(n_steps):
 | |
|             model.step(action=0)
 | |
|             beliefs_history.append(model.state.beliefs.copy())
 | |
|         
 | |
|         # Compute belief changes
 | |
|         diffs = [np.linalg.norm(b1 - b2)
 | |
|                 for b1, b2 in zip(beliefs_history[:-1], beliefs_history[1:])]
 | |
|         
 | |
|         metrics = {
 | |
|             'convergence': {
 | |
|                 'initial_changes': float(np.mean(diffs[:3])),
 | |
|                 'final_changes': float(np.mean(diffs[-3:])),
 | |
|                 'converging': np.mean(diffs[:3]) > np.mean(diffs[-3:])
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert metrics['convergence']['converging'], "No convergence observed"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_save_load_state(self) -> Dict:
 | |
|         """Test state saving and loading."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         
 | |
|         # Take some steps
 | |
|         for _ in range(3):
 | |
|             model.step()
 | |
|         
 | |
|         # Save state
 | |
|         save_path = self.output_dir / "test_state.yaml"
 | |
|         model.save_state(save_path)
 | |
|         
 | |
|         # Create new model and load state
 | |
|         new_model = GenericPOMDP(
 | |
|             num_observations=2,
 | |
|             num_states=3,
 | |
|             num_actions=2
 | |
|         )
 | |
|         new_model.load_state(save_path)
 | |
|         
 | |
|         metrics = {
 | |
|             'state_match': {
 | |
|                 'beliefs': np.allclose(new_model.state.beliefs,
 | |
|                                     model.state.beliefs),
 | |
|                 'time_step': new_model.state.time_step == model.state.time_step
 | |
|             },
 | |
|             'history_match': {
 | |
|                 'observations': len(new_model.state.history['observations']) ==
 | |
|                               len(model.state.history['observations']),
 | |
|                 'actions': len(new_model.state.history['actions']) ==
 | |
|                           len(model.state.history['actions']),
 | |
|                 'beliefs': len(new_model.state.history['beliefs']) ==
 | |
|                           len(model.state.history['beliefs'])
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['state_match'].values()), "State mismatch after loading"
 | |
|         assert all(metrics['history_match'].values()), "History mismatch after loading"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_visualization_outputs(self) -> Dict:
 | |
|         """Test visualization outputs."""
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         visualizer = POMDPVisualizer()
 | |
|         
 | |
|         # Take some steps
 | |
|         for _ in range(5):
 | |
|             model.step()
 | |
|         
 | |
|         # Generate visualizations
 | |
|         visualizer.plot_belief_evolution(model.state.history['beliefs'])
 | |
|         visualizer.plot_observation_matrix(model.A)
 | |
|         visualizer.plot_preferences(model.C)
 | |
|         
 | |
|         metrics = {
 | |
|             'files_generated': {
 | |
|                 'belief_evolution': (self.output_dir / "belief_evolution.png").exists(),
 | |
|                 'observation_matrix': (self.output_dir / "observation_matrix.png").exists(),
 | |
|                 'preferences': (self.output_dir / "preferences.png").exists()
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         assert all(metrics['files_generated'].values()), "Missing visualization outputs"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _test_edge_cases(self) -> Dict:
 | |
|         """Test edge cases and error handling."""
 | |
|         metrics = {
 | |
|             'invalid_dimensions': {
 | |
|                 'caught': False
 | |
|             },
 | |
|             'invalid_action': {
 | |
|                 'caught': False
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         # Test invalid dimensions
 | |
|         try:
 | |
|             GenericPOMDP(num_observations=0, num_states=3, num_actions=2)
 | |
|         except ValueError:
 | |
|             metrics['invalid_dimensions']['caught'] = True
 | |
|         
 | |
|         # Test invalid action
 | |
|         model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
 | |
|         try:
 | |
|             model.step(action=model.num_actions)  # Invalid action
 | |
|         except IndexError:
 | |
|             metrics['invalid_action']['caught'] = True
 | |
|         
 | |
|         assert all(x['caught'] for x in metrics.values()), "Edge cases not handled"
 | |
|         
 | |
|         return metrics
 | |
|     
 | |
|     def _compute_summary(self):
 | |
|         """Compute summary metrics for all tests."""
 | |
|         total_tests = 0
 | |
|         passed_tests = 0
 | |
|         
 | |
|         for group in self.results['tests'].values():
 | |
|             total_tests += group['passed'] + group['failed']
 | |
|             passed_tests += group['passed']
 | |
|         
 | |
|         self.results['summary'] = {
 | |
|             'total_tests': total_tests,
 | |
|             'passed_tests': passed_tests,
 | |
|             'failed_tests': total_tests - passed_tests,
 | |
|             'pass_rate': float(passed_tests) / total_tests if total_tests > 0 else 0.0
 | |
|         }
 | |
|         
 | |
|         logger.info("Test Summary:")
 | |
|         logger.info(f"  Total Tests: {total_tests}")
 | |
|         logger.info(f"  Passed: {passed_tests}")
 | |
|         logger.info(f"  Failed: {total_tests - passed_tests}")
 | |
|         logger.info(f"  Pass Rate: {self.results['summary']['pass_rate']:.2%}")
 | |
|     
 | |
|     def _save_results(self):
 | |
|         """Save test results to file."""
 | |
|         results_file = self.output_dir / "test_results.json"
 | |
|         
 | |
|         with open(results_file, 'w') as f:
 | |
|             json.dump(self.results, f, indent=2)
 | |
|             
 | |
|         logger.info(f"Test results saved to {results_file}")
 | |
| 
 | |
| def main():
 | |
|     """Main entry point."""
 | |
|     runner = TestRunner()
 | |
|     success = runner.run_all_tests()
 | |
|     return 0 if success else 1
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()  | 
