cognitive/Things/Generic_POMDP/test_runner.py
Daniel Ari Friedman 6caa1a7cb1 Update
2025-02-07 08:16:25 -08:00

568 строки
20 KiB
Python

"""
Comprehensive test runner for Generic POMDP implementation.
"""
import pytest
import numpy as np
import logging
import json
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
from generic_pomdp import GenericPOMDP
from visualization import POMDPVisualizer
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('test_results.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
class TestRunner:
"""Comprehensive test runner with detailed logging."""
def __init__(self):
"""Initialize test runner."""
self.output_dir = Path("Output")
self.output_dir.mkdir(exist_ok=True)
self.results = {
'timestamp': datetime.now().isoformat(),
'tests': {},
'metrics': {},
'summary': {}
}
def run_all_tests(self):
"""Run all tests with comprehensive logging."""
logger.info("Starting comprehensive test suite...")
try:
# Run initialization tests
self._run_test_group("initialization", [
self._test_model_initialization,
self._test_matrix_properties,
self._test_state_initialization
])
# Run dynamics tests
self._run_test_group("dynamics", [
self._test_step_without_action,
self._test_step_with_action,
self._test_belief_updating,
self._test_action_selection
])
# Run learning tests
self._run_test_group("learning", [
self._test_learning_dynamics,
self._test_preference_influence,
self._test_convergence
])
# Run numerical stability tests
self._run_test_group("numerical_stability", [
self._test_numerical_stability,
self._test_edge_cases
])
# Run persistence tests
self._run_test_group("persistence", [
self._test_save_load_state
])
# Run visualization tests
self._run_test_group("visualization", [
self._test_visualization_outputs
])
# Compute summary metrics
self._compute_summary()
# Save results
self._save_results()
logger.info("Test suite completed successfully")
return True
except Exception as e:
logger.error(f"Test suite failed: {str(e)}", exc_info=True)
return False
def _run_test_group(self, group_name: str, test_functions: List[callable]):
"""Run a group of related tests.
Args:
group_name: Name of the test group
test_functions: List of test functions to run
"""
logger.info(f"Running {group_name} tests...")
self.results['tests'][group_name] = {
'passed': 0,
'failed': 0,
'details': {}
}
for test_func in test_functions:
test_name = test_func.__name__
logger.info(f"Running test: {test_name}")
try:
metrics = test_func()
self.results['tests'][group_name]['passed'] += 1
self.results['tests'][group_name]['details'][test_name] = {
'status': 'PASS',
'metrics': metrics
}
logger.info(f"Test {test_name} PASSED")
except Exception as e:
self.results['tests'][group_name]['failed'] += 1
self.results['tests'][group_name]['details'][test_name] = {
'status': 'FAIL',
'error': str(e)
}
logger.error(f"Test {test_name} FAILED: {str(e)}", exc_info=True)
def _test_model_initialization(self) -> Dict:
"""Test model initialization."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
metrics = {
'matrix_shapes': {
'A': model.A.shape == (2, 3),
'B': model.B.shape == (3, 3, 2),
'C': model.C.shape == (2, 5),
'D': model.D.shape == (3,),
'E': model.E.shape == (2,)
},
'initial_state': {
'beliefs_shape': model.state.beliefs.shape == (3,),
'time_step': model.state.time_step == 0
}
}
assert all(metrics['matrix_shapes'].values()), "Invalid matrix shapes"
assert all(metrics['initial_state'].values()), "Invalid initial state"
return metrics
def _test_matrix_properties(self) -> Dict:
"""Test matrix properties."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
metrics = {
'A_matrix': {
'column_stochastic': np.allclose(model.A.sum(axis=0), 1.0),
'non_negative': np.all(model.A >= 0)
},
'B_matrix': {
'action_0': {
'column_stochastic': np.allclose(model.B[:,:,0].sum(axis=0), 1.0),
'non_negative': np.all(model.B[:,:,0] >= 0)
},
'action_1': {
'column_stochastic': np.allclose(model.B[:,:,1].sum(axis=0), 1.0),
'non_negative': np.all(model.B[:,:,1] >= 0)
}
},
'C_matrix': {
'finite': np.all(np.isfinite(model.C))
},
'D_matrix': {
'normalized': np.allclose(model.D.sum(), 1.0),
'non_negative': np.all(model.D >= 0)
},
'E_matrix': {
'normalized': np.allclose(model.E.sum(), 1.0),
'non_negative': np.all(model.E >= 0)
}
}
# Verify all properties
assert all(metrics['A_matrix'].values()), "Invalid A matrix properties"
assert all(metrics['B_matrix']['action_0'].values()) and \
all(metrics['B_matrix']['action_1'].values()), "Invalid B matrix properties"
assert all(metrics['C_matrix'].values()), "Invalid C matrix properties"
assert all(metrics['D_matrix'].values()), "Invalid D matrix properties"
assert all(metrics['E_matrix'].values()), "Invalid E matrix properties"
return metrics
def _test_state_initialization(self) -> Dict:
"""Test state initialization."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
metrics = {
'beliefs': {
'normalized': np.allclose(model.state.beliefs.sum(), 1.0),
'non_negative': np.all(model.state.beliefs >= 0)
},
'history': {
'observations_empty': len(model.state.history['observations']) == 0,
'actions_empty': len(model.state.history['actions']) == 0,
'beliefs_initial': len(model.state.history['beliefs']) == 1
}
}
assert all(metrics['beliefs'].values()), "Invalid initial beliefs"
assert all(metrics['history'].values()), "Invalid history initialization"
return metrics
def _test_step_without_action(self) -> Dict:
"""Test stepping without providing action."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
observation, free_energy = model.step()
metrics = {
'observation': {
'valid_range': 0 <= observation < model.num_observations
},
'free_energy': {
'finite': np.isfinite(free_energy)
},
'history': {
'observations_updated': len(model.state.history['observations']) == 1,
'actions_updated': len(model.state.history['actions']) == 1,
'beliefs_updated': len(model.state.history['beliefs']) == 2
}
}
assert all(metrics['observation'].values()), "Invalid observation"
assert all(metrics['free_energy'].values()), "Invalid free energy"
assert all(metrics['history'].values()), "Invalid history update"
return metrics
def _test_step_with_action(self) -> Dict:
"""Test stepping with provided action."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
action = 0
observation, free_energy = model.step(action)
metrics = {
'observation': {
'valid_range': 0 <= observation < model.num_observations
},
'free_energy': {
'finite': np.isfinite(free_energy)
},
'action': {
'correct_action': model.state.history['actions'][-1] == action
}
}
assert all(metrics['observation'].values()), "Invalid observation"
assert all(metrics['free_energy'].values()), "Invalid free energy"
assert all(metrics['action'].values()), "Invalid action"
return metrics
def _test_belief_updating(self) -> Dict:
"""Test belief updating mechanism."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
initial_beliefs = model.state.beliefs.copy()
model.step()
metrics = {
'belief_change': {
'updated': not np.allclose(model.state.beliefs, initial_beliefs),
'normalized': np.allclose(model.state.beliefs.sum(), 1.0),
'non_negative': np.all(model.state.beliefs >= 0)
}
}
assert all(metrics['belief_change'].values()), "Invalid belief update"
return metrics
def _test_action_selection(self) -> Dict:
"""Test action selection mechanism."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
action, probs = model._select_action()
metrics = {
'action': {
'valid_range': 0 <= action < model.num_actions
},
'probabilities': {
'normalized': np.allclose(probs.sum(), 1.0),
'non_negative': np.all(probs >= 0),
'correct_shape': len(probs) == model.num_actions
}
}
assert all(metrics['action'].values()), "Invalid action"
assert all(metrics['probabilities'].values()), "Invalid action probabilities"
return metrics
def _test_learning_dynamics(self) -> Dict:
"""Test learning dynamics over multiple steps."""
model = GenericPOMDP(num_observations=4, num_states=5, num_actions=3)
# Track belief entropy
entropies = []
n_steps = 20
for _ in range(n_steps):
model.step()
entropy = -np.sum(model.state.beliefs *
np.log(model.state.beliefs +
model.stability_threshold))
entropies.append(entropy)
metrics = {
'entropy': {
'initial_mean': float(np.mean(entropies[:5])),
'final_mean': float(np.mean(entropies[-5:])),
'decreased': np.mean(entropies[:5]) > np.mean(entropies[-5:])
}
}
assert metrics['entropy']['decreased'], "No learning progress observed"
return metrics
def _test_preference_influence(self) -> Dict:
"""Test influence of preferences on action selection."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
# Set strong preference for first observation
model.C = np.zeros((model.num_observations, model.time_horizon))
model.C[0,:] = 2.0 # Strong preference for first observation
model.C[1,:] = -2.0 # Avoid second observation
# Set deterministic observation model
model.A = np.zeros((model.num_observations, model.num_states))
model.A[0,0] = 1.0 # First state -> first observation
model.A[1,1] = 1.0 # Second state -> second observation
model.A[:,2] = 0.5 # Third state -> random observation
# Set controllable transition model
model.B[:,:,0] = np.eye(model.num_states) # Action 0: stay in place
model.B[:,:,1] = np.roll(np.eye(model.num_states), 1, axis=1) # Action 1: cycle states
observations = []
n_steps = 50 # More steps to ensure preference influence
for _ in range(n_steps):
obs, _ = model.step()
observations.append(obs)
obs_counts = np.bincount(observations)
metrics = {
'observations': {
'preferred_count': int(obs_counts[0]),
'other_count': int(obs_counts[1]),
'preference_followed': obs_counts[0] > obs_counts[1]
}
}
assert metrics['observations']['preference_followed'], \
"Preferences not influencing behavior"
return metrics
def _test_numerical_stability(self) -> Dict:
"""Test numerical stability of computations."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
# Set very small beliefs
model.state.beliefs = np.ones(model.num_states) * 1e-10
model.state.beliefs /= model.state.beliefs.sum()
observation, free_energy = model.step()
metrics = {
'free_energy': {
'finite': np.isfinite(free_energy)
},
'beliefs': {
'finite': np.all(np.isfinite(model.state.beliefs)),
'normalized': np.allclose(model.state.beliefs.sum(), 1.0),
'non_negative': np.all(model.state.beliefs >= 0)
}
}
assert all(metrics['free_energy'].values()), "Unstable free energy"
assert all(metrics['beliefs'].values()), "Unstable beliefs"
return metrics
def _test_convergence(self) -> Dict:
"""Test convergence properties."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
# Set deterministic transition and observation
model.B[:,:,0] = np.eye(model.num_states) # Identity transitions
model.A = np.zeros((model.num_observations, model.num_states))
model.A[0,0] = 1.0 # First state -> first observation
model.A[1,1] = 1.0 # Second state -> second observation
beliefs_history = []
n_steps = 10
for _ in range(n_steps):
model.step(action=0)
beliefs_history.append(model.state.beliefs.copy())
# Compute belief changes
diffs = [np.linalg.norm(b1 - b2)
for b1, b2 in zip(beliefs_history[:-1], beliefs_history[1:])]
metrics = {
'convergence': {
'initial_changes': float(np.mean(diffs[:3])),
'final_changes': float(np.mean(diffs[-3:])),
'converging': np.mean(diffs[:3]) > np.mean(diffs[-3:])
}
}
assert metrics['convergence']['converging'], "No convergence observed"
return metrics
def _test_save_load_state(self) -> Dict:
"""Test state saving and loading."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
# Take some steps
for _ in range(3):
model.step()
# Save state
save_path = self.output_dir / "test_state.yaml"
model.save_state(save_path)
# Create new model and load state
new_model = GenericPOMDP(
num_observations=2,
num_states=3,
num_actions=2
)
new_model.load_state(save_path)
metrics = {
'state_match': {
'beliefs': np.allclose(new_model.state.beliefs,
model.state.beliefs),
'time_step': new_model.state.time_step == model.state.time_step
},
'history_match': {
'observations': len(new_model.state.history['observations']) ==
len(model.state.history['observations']),
'actions': len(new_model.state.history['actions']) ==
len(model.state.history['actions']),
'beliefs': len(new_model.state.history['beliefs']) ==
len(model.state.history['beliefs'])
}
}
assert all(metrics['state_match'].values()), "State mismatch after loading"
assert all(metrics['history_match'].values()), "History mismatch after loading"
return metrics
def _test_visualization_outputs(self) -> Dict:
"""Test visualization outputs."""
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
visualizer = POMDPVisualizer()
# Take some steps
for _ in range(5):
model.step()
# Generate visualizations
visualizer.plot_belief_evolution(model.state.history['beliefs'])
visualizer.plot_observation_matrix(model.A)
visualizer.plot_preferences(model.C)
metrics = {
'files_generated': {
'belief_evolution': (self.output_dir / "belief_evolution.png").exists(),
'observation_matrix': (self.output_dir / "observation_matrix.png").exists(),
'preferences': (self.output_dir / "preferences.png").exists()
}
}
assert all(metrics['files_generated'].values()), "Missing visualization outputs"
return metrics
def _test_edge_cases(self) -> Dict:
"""Test edge cases and error handling."""
metrics = {
'invalid_dimensions': {
'caught': False
},
'invalid_action': {
'caught': False
}
}
# Test invalid dimensions
try:
GenericPOMDP(num_observations=0, num_states=3, num_actions=2)
except ValueError:
metrics['invalid_dimensions']['caught'] = True
# Test invalid action
model = GenericPOMDP(num_observations=2, num_states=3, num_actions=2)
try:
model.step(action=model.num_actions) # Invalid action
except IndexError:
metrics['invalid_action']['caught'] = True
assert all(x['caught'] for x in metrics.values()), "Edge cases not handled"
return metrics
def _compute_summary(self):
"""Compute summary metrics for all tests."""
total_tests = 0
passed_tests = 0
for group in self.results['tests'].values():
total_tests += group['passed'] + group['failed']
passed_tests += group['passed']
self.results['summary'] = {
'total_tests': total_tests,
'passed_tests': passed_tests,
'failed_tests': total_tests - passed_tests,
'pass_rate': float(passed_tests) / total_tests if total_tests > 0 else 0.0
}
logger.info("Test Summary:")
logger.info(f" Total Tests: {total_tests}")
logger.info(f" Passed: {passed_tests}")
logger.info(f" Failed: {total_tests - passed_tests}")
logger.info(f" Pass Rate: {self.results['summary']['pass_rate']:.2%}")
def _save_results(self):
"""Save test results to file."""
results_file = self.output_dir / "test_results.json"
with open(results_file, 'w') as f:
json.dump(self.results, f, indent=2)
logger.info(f"Test results saved to {results_file}")
def main():
"""Main entry point."""
runner = TestRunner()
success = runner.run_all_tests()
return 0 if success else 1
if __name__ == "__main__":
main()