Learning Agents

1. Vacuum Cleaner Learning agent.

Source Code

import numpy as np

class VacuumLearningAgent:
    def __init__(self, locations, actions):
        self.locations = locations
        self.actions = actions
        self.q_table = {}

    def perceive(self, environment):
        return environment

    def choose_action(self, state):
        state_tuple = tuple(state)
        
        if state_tuple not in self.q_table:
            self.q_table[state_tuple] = {action: 0 for action in self.actions}

        # Exploration-exploitation trade-off
        if np.random.uniform(0, 1) < 0.2:
            action = np.random.choice(self.actions)  # Explore
        else:
            action = max(self.q_table[state_tuple],  key=self.q_table[state_tuple].get)  # Exploit

        return action

    def learn(self, state, action, reward, next_state):
        state_tuple = tuple(state)
        next_state_tuple = tuple(next_state)

        if state_tuple not in self.q_table:
            self.q_table[state_tuple] = {a: 0 for a in self.actions}

        if next_state_tuple not in self.q_table:
            self.q_table[next_state_tuple] = {a: 0 for a in self.actions}

        # Q-learning update formula
        self.q_table[state_tuple][action] += 0.1 * (reward + 0.9 * max(self.q_table[next_state_tuple].values()) - self.q_table[state_tuple][action])

    def act(self, percept):
        state = percept['state']
        action = self.choose_action(state)
        print(f"Agent performs action: {action}")
        return action


class VacuumEnvironment:
    def __init__(self, locations):
        self.locations = locations
        self.states = [(loc, 'clean') for loc in locations]

    def update_state(self, location, status):
        index = self.locations.index(location)
        self.states[index] = (location, status)

    def get_reward(self, location, action):
        if action == 'clean':
            return 1 if self.states[self.locations.index(location)][1] == 'dirty' else -1
        else:
            return 0  # No reward for moving

# Demonstrate the working of the learning agent for the vacuum cleaner
locations = ['A', 'B']
actions = ['clean', 'move_right', 'move_left']
agent = VacuumLearningAgent(locations=locations, actions=actions)
environment = VacuumEnvironment(locations=locations)

for _ in range(5):
    percept = {'state': environment.states}
    action = agent.act(percept)

    # Update the environment state
    location = input("Enter the location (A/B): ").upper()
    status = input("Enter the status (clean/dirty): ").lower()
    environment.update_state(location, status)

    # Provide the agent with a reward and update its Q-values
    reward = environment.get_reward(location, action)
    agent.learn(percept['state'], action, reward, environment.states)

Output

Agent performs action: move_right
Enter the location (A/B): A
Enter the status (clean/dirty): dirty
Agent performs action: clean
Enter the location (A/B): A
Enter the status (clean/dirty): clean
Agent performs action: move_right
Enter the location (A/B): B
Enter the status (clean/dirty): dirty
Agent performs action: move_left
Enter the location (A/B): A
Enter the status (clean/dirty): clean
Agent performs action: clean
Enter the location (A/B): B
Enter the status (clean/dirty): dirty

2. Prime number Learning Agent

import numpy as np

class PrimeNumberLearningAgent:
    def __init__(self, actions):
        self.actions = actions
        self.q_table = {}

    def choose_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {action: 0 for action in self.actions}

        # Exploration-exploitation trade-off
        if np.random.uniform(0, 1) < 0.2:
            action = np.random.choice(self.actions)  # Explore
        else:
            action = max(self.q_table[state], key=self.q_table[state].get)  # Exploit

        return action

    def learn(self, state, action, reward, next_state):
        if next_state not in self.q_table:
            self.q_table[next_state] = {a: 0 for a in self.actions}

        # Q-learning update formula
        self.q_table[state][action] += 0.1 * (reward + 0.9 * max(self.q_table[next_state].values()) - self.q_table[state][action])

    def act(self, state):
        action = self.choose_action(state)
        print(f"Agent predicts: {action}")
        return action


class PrimeNumberEnvironment:
    def __init__(self):
        pass

    def is_prime(self, number):
        if number < 2:
            return False
        for i in range(2, int(number**0.5) + 1):
            if number % i == 0:
                return False
        return True

    def get_reward(self, number, action):
        if action == 'is_prime':
            return 1 if self.is_prime(number) else -1
        elif action == 'not_prime':
            return 1 if not self.is_prime(number) else -1
        else:
            return 0

# Demonstrate the working of the learning agent for prime numbers
actions = ['is_prime', 'not_prime']
agent = PrimeNumberLearningAgent(actions=actions)
environment = PrimeNumberEnvironment()

for _ in range(10):
    number = int(input("Enter a positive integer: "))
    action = agent.act(number)

    # Provide the agent with a reward and update its Q-values
    reward = environment.get_reward(number, action)
    agent.learn(number, action, reward, number)

Output

Enter a positive integer: 1 Agent predicts: is_prime 
Enter a positive integer: 2 Agent predicts: is_prime 
Enter a positive integer: 3 Agent predicts: not_prime 
Enter a positive integer: 4 Agent predicts: is_prime 
Enter a positive integer: 5 Agent predicts: is_prime 
Enter a positive integer: 6 Agent predicts: is_prime 
Enter a positive integer: 7 Agent predicts: is_prime 
Enter a positive integer: 8 Agent predicts: not_prime 
Enter a positive integer: 9 Agent predicts: is_prime 
Enter a positive integer: 10 Agent predicts: is_prime