1. Vacuum Cleaner Learning agent.
Source Code
import numpy as np
class VacuumLearningAgent:
def __init__(self, locations, actions):
self.locations = locations
self.actions = actions
self.q_table = {}
def perceive(self, environment):
return environment
def choose_action(self, state):
state_tuple = tuple(state)
if state_tuple not in self.q_table:
self.q_table[state_tuple] = {action: 0 for action in self.actions}
# Exploration-exploitation trade-off
if np.random.uniform(0, 1) < 0.2:
action = np.random.choice(self.actions) # Explore
else:
action = max(self.q_table[state_tuple], key=self.q_table[state_tuple].get) # Exploit
return action
def learn(self, state, action, reward, next_state):
state_tuple = tuple(state)
next_state_tuple = tuple(next_state)
if state_tuple not in self.q_table:
self.q_table[state_tuple] = {a: 0 for a in self.actions}
if next_state_tuple not in self.q_table:
self.q_table[next_state_tuple] = {a: 0 for a in self.actions}
# Q-learning update formula
self.q_table[state_tuple][action] += 0.1 * (reward + 0.9 * max(self.q_table[next_state_tuple].values()) - self.q_table[state_tuple][action])
def act(self, percept):
state = percept['state']
action = self.choose_action(state)
print(f"Agent performs action: {action}")
return action
class VacuumEnvironment:
def __init__(self, locations):
self.locations = locations
self.states = [(loc, 'clean') for loc in locations]
def update_state(self, location, status):
index = self.locations.index(location)
self.states[index] = (location, status)
def get_reward(self, location, action):
if action == 'clean':
return 1 if self.states[self.locations.index(location)][1] == 'dirty' else -1
else:
return 0 # No reward for moving
# Demonstrate the working of the learning agent for the vacuum cleaner
locations = ['A', 'B']
actions = ['clean', 'move_right', 'move_left']
agent = VacuumLearningAgent(locations=locations, actions=actions)
environment = VacuumEnvironment(locations=locations)
for _ in range(5):
percept = {'state': environment.states}
action = agent.act(percept)
# Update the environment state
location = input("Enter the location (A/B): ").upper()
status = input("Enter the status (clean/dirty): ").lower()
environment.update_state(location, status)
# Provide the agent with a reward and update its Q-values
reward = environment.get_reward(location, action)
agent.learn(percept['state'], action, reward, environment.states)
Output
Agent performs action: move_right
Enter the location (A/B): A
Enter the status (clean/dirty): dirty
Agent performs action: clean
Enter the location (A/B): A
Enter the status (clean/dirty): clean
Agent performs action: move_right
Enter the location (A/B): B
Enter the status (clean/dirty): dirty
Agent performs action: move_left
Enter the location (A/B): A
Enter the status (clean/dirty): clean
Agent performs action: clean
Enter the location (A/B): B
Enter the status (clean/dirty): dirty
2. Prime number Learning Agent
import numpy as np
class PrimeNumberLearningAgent:
def __init__(self, actions):
self.actions = actions
self.q_table = {}
def choose_action(self, state):
if state not in self.q_table:
self.q_table[state] = {action: 0 for action in self.actions}
# Exploration-exploitation trade-off
if np.random.uniform(0, 1) < 0.2:
action = np.random.choice(self.actions) # Explore
else:
action = max(self.q_table[state], key=self.q_table[state].get) # Exploit
return action
def learn(self, state, action, reward, next_state):
if next_state not in self.q_table:
self.q_table[next_state] = {a: 0 for a in self.actions}
# Q-learning update formula
self.q_table[state][action] += 0.1 * (reward + 0.9 * max(self.q_table[next_state].values()) - self.q_table[state][action])
def act(self, state):
action = self.choose_action(state)
print(f"Agent predicts: {action}")
return action
class PrimeNumberEnvironment:
def __init__(self):
pass
def is_prime(self, number):
if number < 2:
return False
for i in range(2, int(number**0.5) + 1):
if number % i == 0:
return False
return True
def get_reward(self, number, action):
if action == 'is_prime':
return 1 if self.is_prime(number) else -1
elif action == 'not_prime':
return 1 if not self.is_prime(number) else -1
else:
return 0
# Demonstrate the working of the learning agent for prime numbers
actions = ['is_prime', 'not_prime']
agent = PrimeNumberLearningAgent(actions=actions)
environment = PrimeNumberEnvironment()
for _ in range(10):
number = int(input("Enter a positive integer: "))
action = agent.act(number)
# Provide the agent with a reward and update its Q-values
reward = environment.get_reward(number, action)
agent.learn(number, action, reward, number)
Output
Enter a positive integer: 1 Agent predicts: is_prime
Enter a positive integer: 2 Agent predicts: is_prime
Enter a positive integer: 3 Agent predicts: not_prime
Enter a positive integer: 4 Agent predicts: is_prime
Enter a positive integer: 5 Agent predicts: is_prime
Enter a positive integer: 6 Agent predicts: is_prime
Enter a positive integer: 7 Agent predicts: is_prime
Enter a positive integer: 8 Agent predicts: not_prime
Enter a positive integer: 9 Agent predicts: is_prime
Enter a positive integer: 10 Agent predicts: is_prime