python - graficar - Detecta el tap con pyaudio desde el micrófono en vivo

graficar en python (2)

Una forma en que lo hice:

lee un bloque de muestras a la vez, digamos 0.05 segundos
calcular la amplitud RMS del bloque (raíz cuadrada del promedio de los cuadrados de las muestras individuales)
si la amplitud RMS del bloque es mayor que un umbral, es un "bloque ruidoso"; de lo contrario, es un "bloque silencioso"
un toque repentino sería un bloque silencioso seguido de un pequeño número de bloques ruidosos seguido de un bloque silencioso
si nunca obtienes un bloqueo silencioso, tu umbral es demasiado bajo
si nunca obtienes un bloqueo ruidoso, tu umbral es demasiado alto

Mi aplicación estaba grabando ruidos "interesantes" sin supervisión, por lo que grabaría mientras hubiera bloques ruidosos. Multiplicaría el umbral por 1.1 si hubiera un período ruidoso de 15 segundos ("cubriendo sus orejas") y multiplicaría el umbral por 0.9 si hubiera un período de silencio de 15 minutos ("escuchando más fuerte"). Su aplicación tendrá diferentes necesidades.

Además, acabo de notar algunos comentarios en mi código con respecto a los valores observados de RMS. En el micrófono incorporado en una Macbook Pro, con +/- 1.0 rango de datos de audio normalizado, con el volumen de entrada configurado al máximo, algunos puntos de datos:

0.003-0.006 (-50dB a -44dB) un ventilador de calefacción central desagradablemente ruidoso en mi casa
0.010-0.40 (-40dB a -8dB) escribiendo en la misma computadora portátil
0.10 (-20dB) chasqueando los dedos suavemente a 1 ''de distancia
0.60 (-4.4dB) chasqueando los dedos ruidosamente a 1 ''

Actualización: aquí hay una muestra para que comiences.

#!/usr/bin/python # open a microphone in pyAudio and listen for taps import pyaudio import struct import math INITIAL_TAP_THRESHOLD = 0.010 FORMAT = pyaudio.paInt16 SHORT_NORMALIZE = (1.0/32768.0) CHANNELS = 2 RATE = 44100 INPUT_BLOCK_TIME = 0.05 INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME) # if we get this many noisy blocks in a row, increase the threshold OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME # if we get this many quiet blocks in a row, decrease the threshold UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME # if the noise was longer than this many blocks, it''s not a ''tap'' MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME def get_rms( block ): # RMS amplitude is defined as the square root of the # mean over time of the square of the amplitude. # so we need to convert this string of bytes into # a string of 16-bit samples... # we will get one short out for each # two chars in the string. count = len(block)/2 format = "%dh"%(count) shorts = struct.unpack( format, block ) # iterate over the block. sum_squares = 0.0 for sample in shorts: # sample is a signed short in +/- 32768. # normalize it to 1.0 n = sample * SHORT_NORMALIZE sum_squares += n*n return math.sqrt( sum_squares / count ) class TapTester(object): def __init__(self): self.pa = pyaudio.PyAudio() self.stream = self.open_mic_stream() self.tap_threshold = INITIAL_TAP_THRESHOLD self.noisycount = MAX_TAP_BLOCKS+1 self.quietcount = 0 self.errorcount = 0 def stop(self): self.stream.close() def find_input_device(self): device_index = None for i in range( self.pa.get_device_count() ): devinfo = self.pa.get_device_info_by_index(i) print( "Device %d: %s"%(i,devinfo["name"]) ) for keyword in ["mic","input"]: if keyword in devinfo["name"].lower(): print( "Found an input: device %d - %s"%(i,devinfo["name"]) ) device_index = i return device_index if device_index == None: print( "No preferred input found; using default input device." ) return device_index def open_mic_stream( self ): device_index = self.find_input_device() stream = self.pa.open( format = FORMAT, channels = CHANNELS, rate = RATE, input = True, input_device_index = device_index, frames_per_buffer = INPUT_FRAMES_PER_BLOCK) return stream def tapDetected(self): print "Tap!" def listen(self): try: block = self.stream.read(INPUT_FRAMES_PER_BLOCK) except IOError, e: # dammit. self.errorcount += 1 print( "(%d) Error recording: %s"%(self.errorcount,e) ) self.noisycount = 1 return amplitude = get_rms( block ) if amplitude > self.tap_threshold: # noisy block self.quietcount = 0 self.noisycount += 1 if self.noisycount > OVERSENSITIVE: # turn down the sensitivity self.tap_threshold *= 1.1 else: # quiet block. if 1 <= self.noisycount <= MAX_TAP_BLOCKS: self.tapDetected() self.noisycount = 0 self.quietcount += 1 if self.quietcount > UNDERSENSITIVE: # turn up the sensitivity self.tap_threshold *= 0.9 if __name__ == "__main__": tt = TapTester() for i in range(1000): tt.listen()

¿Cómo usaría pyaudio para detectar un ruido de tapping repentino de un micrófono en vivo?

una versión simplificada del código anterior ...

import pyaudio import struct import math INITIAL_TAP_THRESHOLD = 0.010 FORMAT = pyaudio.paInt16 SHORT_NORMALIZE = (1.0/32768.0) CHANNELS = 2 RATE = 44100 INPUT_BLOCK_TIME = 0.05 INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME) OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME # if we get this many quiet blocks in a row, decrease the threshold MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME # if the noise was longer than this many blocks, it''s not a ''tap'' def get_rms(block): # RMS amplitude is defined as the square root of the # mean over time of the square of the amplitude. # so we need to convert this string of bytes into # a string of 16-bit samples... # we will get one short out for each # two chars in the string. count = len(block)/2 format = "%dh"%(count) shorts = struct.unpack( format, block ) # iterate over the block. sum_squares = 0.0 for sample in shorts: # sample is a signed short in +/- 32768. # normalize it to 1.0 n = sample * SHORT_NORMALIZE sum_squares += n*n return math.sqrt( sum_squares / count ) pa = pyaudio.PyAudio() #] #| stream = pa.open(format = FORMAT, #| channels = CHANNELS, #|---- You always use this in pyaudio... rate = RATE, #| input = True, #| frames_per_buffer = INPUT_FRAMES_PER_BLOCK) #] tap_threshold = INITIAL_TAP_THRESHOLD #] noisycount = MAX_TAP_BLOCKS+1 #|---- Variables for noise detector... quietcount = 0 #| errorcount = 0 #] for i in range(1000): try: #] block = stream.read(INPUT_FRAMES_PER_BLOCK) #| except IOError, e: #|---- just in case there is an error! errorcount += 1 #| print( "(%d) Error recording: %s"%(errorcount,e) ) #| noisycount = 1 #] amplitude = get_rms(block) if amplitude > tap_threshold: # if its to loud... quietcount = 0 noisycount += 1 if noisycount > OVERSENSITIVE: tap_threshold *= 1.1 # turn down the sensitivity else: # if its to quiet... if 1 <= noisycount <= MAX_TAP_BLOCKS: print ''tap!'' noisycount = 0 quietcount += 1 if quietcount > UNDERSENSITIVE: tap_threshold *= 0.9 # turn up the sensitivity