Implementing Reactions
Basically, JARVIS can respond to a command in two ways: On the one hand, JARVIS can generate a voice output to respond directly to the user. On the other hand, you can define actions that will be executed when a command is recognized. Of course, both are possible at the same time.
The voice output to be generated and the action to be performed are simply stored in a JSON file. In the following example, the command "who are you" generates the voice response "I am Jarvis..." but no further action, while the command "time" has no voice response, but action 7 is executed.
[
{
"command": "who are you",
"response": "I am Jarvis, an artificial intelligence",
"action": 0
},
{
"command": "time",
"response": "",
"action": 1
}
]
Voice Responses
The class "TextSynthesizer" is responsible for the voice responses and uses the library "pyttsx3" to generate the voice output. Pyttsx3 is a Python text-to-speech conversion library, unlike alternative libraries it works offline and is compatible with both Python 2 and 3.
#Installation pip install pyttsx3
In the __init__ method, the voice parameter specifies which voice (from the operating system) should be used. "self.synthesizer.setProperty('rate', 150) sets the speaking speed.
class TextSynthesizer():
def __init__(self, voice):
self.synthesizer = pyttsx3.init()
# set output rate = speed, higher means faster
self.synthesizer.setProperty('rate', 150)
voices = self.synthesizer.getProperty('voices')
self.synthesizer.setProperty('voice', voices[voice].id)
def speak(self, text):
self.synthesizer.say(text)
self.synthesizer.runAndWait()
self.synthesizer.stop()
Actions
In the ActionHandler class I have implemented a few examples of actions. Of course, the possible actions depend strongly on the environment on which JARVIS runs. Among other things I have examples for reading out the current time or opening the Google web page.
class Actionhandler:
def __init__(self):
self.synthesizer = TextSynthesizer(1)
def handleAction(self, action):
if action == 0:
pass;
if action == 1:
# read the current time
dt = datetime.datetime.now()
date = dt.strftime('The time right now is %H hours and %M minutes')
self.synthesizer.speak(date)
if action == 2:
dt = datetime.datetime.now()
date = dt.strftime('The date is %b %d, %Y')
self.synthesizer.speak(date)
if action == 3:
webbrowser.open("https://www.google.com")
if action == 4:
# get system information
m1 = "I'm running on " , platform.system()
self.synthesizer.speak(m1)
m2 = platform.release()
self.synthesizer.speak(m2)
m3 = "my IP Adress is " , socket.gethostbyname(socket.gethostname())
self.synthesizer.speak(m3)
Full Code
# JARVIS
# Simple Voice Recognition
# JARVIS is the main class
# -> CommandMatcher tries to find a matching command from the command list file (commands.json)
# -> ActionHandler is used to perform actions like opening a browser ort telling the current time ...
# -> TextSynthesizer is required to generate the voice output
# Stefan Hager, 2023
import json
import sys
import pyaudio
import deepspeech
import numpy as np
from queue import SimpleQueue
from CommandMatcher import CommandMatcher
from TextSynthesizer import TextSynthesizer
from ActionHandler import Actionhandler
class JARVIS:
def __init__(self):
# TextSynthesizer to generate snthetic speach output
self.synthesizer = TextSynthesizer(1)
# Action handler to get some actions done
self.actionhandler = Actionhandler()
self.model = deepspeech.Model("deepspeech-0.9.3-models.pbmm")
self.model.setBeamWidth(512)
# direct audio processing
self.audio = pyaudio.PyAudio()
self.index, name = self.findAudioDevice(self.audio, 'pulse')
print("selected audio device : ", name)
# read command file
try:
with open('commands.json') as json_file:
self.commands = json.load(json_file)
except:
print("No command file found ... terminating.")
sys.exit()
def waitForCommand(self):
# important: create new queue to make sure it is empty
self.buffer_queue = SimpleQueue()
self.stream = self.model.createStream()
buffer_size = self.model.sampleRate()
self.audio_stream = self.audio.open(rate=self.model.sampleRate(),
channels=1,
format=self.audio.get_format_from_width(2, unsigned=False),
input_device_index=self.index,
input=True,
frames_per_buffer=buffer_size,
stream_callback=self.audio_callback)
iteration_without_match = 0
while self.audio_stream.is_active():
self.stream.feedAudioContent(self.buffer_queue.get())
text = self.stream.intermediateDecode()
print(">>" , text)
iteration_without_match += 1
if self.processCommand(text):
break
if iteration_without_match > 10:
# if there is no match for a longer period -> clear stram and buffer
break
self.stream.finishStream()
self.audio_stream.close()
return text
# callback for new data in the audio stream
def audio_callback(self, in_data, frame_count, time_info, status_flags):
self.buffer_queue.put(np.frombuffer(in_data, dtype='int16'))
return (None, pyaudio.paContinue)
def findAudioDevice(self, pyaudio, device_name):
''' find specific device or return default input device'''
default = pyaudio.get_default_input_device_info()
for i in range(pyaudio.get_device_count()):
name = pyaudio.get_device_info_by_index(i)['name']
if name == device_name:
return (i, name)
return (default['index'], default['name'])
# does the recognnized text match any command?
def processCommand(self, text):
for com in self.commands:
match = CommandMatcher.matchCommand(com["command"], text)
if match:
self.synthesizer.speak(com["response"])
self.actionhandler.handleAction(int(com["action"]))
return True
return False;
x = JARVIS()
stop = False
while not stop:
text = x.waitForCommand()
if text == "stop":
stop = True
print("finished...")
# JARVIS
# Simple Voice Recognition
# -> CommandMatcher tries to find a matching command from the command list file (commands.json)
# Stefan Hager, 2023
from difflib import SequenceMatcher
class CommandMatcher:
@staticmethod
def matchCommand(command, text):
if command in text:
# direct match
print(">>> full match : ",command)
return True
else:
# no direct match
comwords = command.split()
textwords = text.split()
length = len(comwords)
if length == 1:
# only one command word to match
for word in textwords:
ratio = SequenceMatcher(None, word, comwords[0]).ratio()
if ratio >= 0.75:
print(">>> partial match : ", word, " r : ", ratio)
return True
return False
# JARVIS
# Simple Voice Recognition
# -> ActionHandler is used to perform actions like opening a browser ort telling the current time ...
# Stefan Hager, 2023
import platform
import socket
import webbrowser
from TextSynthesizer import TextSynthesizer
import datetime
import datetime
class Actionhandler:
def __init__(self):
self.synthesizer = TextSynthesizer(1)
def handleAction(self, action):
if action == 0:
return;
if action == 1:
pass
if action == 2:
pass
if action == 3:
pass
if action == 4:
pass
if action == 5:
pass
if action == 6:
pass
if action == 7:
# time
dt = datetime.datetime.now()
date = dt.strftime('The time right now is %H hours and %M minutes')
self.synthesizer.speak(date)
if action == 8:
dt = datetime.datetime.now()
date = dt.strftime('The date is %b %d, %Y')
self.synthesizer.speak(date)
if action == 10:
webbrowser.open("https://www.google.com")
if action == 11:
webbrowser.open("")
if action == 12:
# get system information
m1 = "I'm running on " , platform.system()
self.synthesizer.speak(m1)
m2 = platform.release()
self.synthesizer.speak(m2)
m3 = "my IP Adress is " , socket.gethostbyname(socket.gethostname())
self.synthesizer.speak(m3)
# JARVIS
# Simple Voice Recognition
# -> TextSynthesizer is required to generate the voice output
# Stefan Hager, 2023
import pyttsx3
class TextSynthesizer():
def __init__(self, voice):
self.synthesizer = pyttsx3.init()
# set output rate = speed, higher means faster
self.synthesizer.setProperty('rate', 150)
voices = self.synthesizer.getProperty('voices')
self.synthesizer.setProperty('voice', voices[voice].id)
def speak(self, text):
self.synthesizer.say(text)
self.synthesizer.runAndWait()
self.synthesizer.stop()
[
{
"command": "who are you",
"response": "I am Jarvis, an artificial intelligence",
"action": 0
},
{
"command": "how are you",
"response": "I am fine, up and running",
"action": 0
},
{
"command": "hello",
"response": "Hi, nice to meet you",
"action": 0
},
{
"command": "master",
"response": "my master is Stefan Hager, the greatest of all times",
"action": 0
},
{
"command": "stop",
"response": "Goodbye",
"action": 1
},
{
"command": "lights on",
"response": "Copy, switching lights on",
"action": 2
},
{
"command": "lights of",
"response": "Copy, switching lights off",
"action": 3
},
{
"command": "power on",
"response": "Copy, power on",
"action": 4
},
{
"command": "power of",
"response": "Copy, power off",
"action": 5
},
{
"command": "message",
"response": "Engines armed, power on, ready for takeoff",
"action": 0
},
{
"command": "status",
"response": "Up and running",
"action": 0
},
{
"command": "time",
"response": "",
"action": 7
},
{
"command": "date",
"response": "",
"action": 8
},
{
"command": "weather",
"response": "Weather forecast is not yet implemented",
"action": 9
},
{
"command": "google",
"response": "Copy, start google",
"action": 10
},
{
"command": "browser",
"response": "Copy, opening browser",
"action": 11
},
{
"command": "system",
"response": "",
"action": 12
}
]