%------------------------------------------------------------------------------- % File Name : epdone.m % Description : This program detects the onset and offset of the speech for a single % input wave file. %------------------------------------------------------------------------------- % Author : Prateek Bansal %------------------------------------------------------------------------------- clear all; close all; window_size = 10; % Its in Milliseconds end_silence = 100; % Its in Milliseconds back_off = 250; % In milliseconds % Create an output file FILE = input(' Name of the Wave file to be processed(.wav):','s'); check = exist(FILE,'file'); if check ~= 2 fprintf('!! The file is not present in the current directory ! \n'); return; end % Read the Wave file [input_sig Fs] = wavread(FILE); % Normalized the input signal input_sig = input_sig/max(input_sig); % High Pass filtered at 100Hz to remove dc content and AC hum W1 = 100 *(2/Fs) ; % Low Pass Filtered at 4KHz W2 = (4*10^3)*(2/Fs); % Design the bandpass filter [B,A]= butter(10,[W1,W2]); % Filtered Signal Output filt_inpsig = input_sig; % Normalize the filtered signal filt_inpsig = filt_inpsig/max(filt_inpsig); % Length of filtered data signal_length = length(filt_inpsig); % Define the time axis time_t = 0 : 1 : (signal_length-1); % WindowSize in Samples win_samples = round((Fs*window_size)/1000); % Define the Hamming Window ham_window = hamming(win_samples); j = 1; for i = 1: win_samples : signal_length - win_samples temp = filt_inpsig(i:i+win_samples-1); temp2 = temp.*ham_window; temp2 = abs(filter(B,A,temp2)); energy(j) = sum(temp2); j = j+1; end % Compute of End of Silence in Samples end_silence_sample = round((end_silence*Fs)/1000); % Define the Silence Range silenceRange = 1:length(1:win_samples:end_silence_sample - win_samples); % Energy Thresholds % IMN (silence energy) is average energy for initial 100ms of signal. % IMX=max(energy); % peak Energy % IMN=mean(energy(silenceRange)); % Silence energy % I1=0.03*(IMX-IMN)+IMN; % I2=4*IMN; % ITL=min(I1,I2); % Lower Threshold ITL = mean(energy) * 0.2; ITU=5*ITL; % Upper Threshold % Determination of End Points N1=0; % Start point initial estimate N2=0; % End point initial estimate duration=length(energy); done=0; % Estimation of the start point based on energy considerations for m=1:duration if and(energy(m)>=ITL,~done) for i=m+2:duration if energy(i)=ITU if ~done N1=i-(i==m); done=1; end break end end end end end done=0; % Estimation of the end point based on energy considerations for m=duration:-1:1 if and(energy(m)>=ITL,~done) for i=m-2:-1:1 if energy(i)=ITU if ~done N2=i+(i==m); done=1; end break end end end end end warpRatio=round(length(filt_inpsig)/length(energy)); N1_w=N1*warpRatio; N2_w=N2*warpRatio; % Plotting the signal p = 1; if p subplot(2,1,1); plot(time_t,input_sig); hold on; line([N1_w,N1_w],[-1,1],'Color',[1 0 1]); line([N2_w,N2_w],[-1,1],'Color',[1 0 1]); title(' End Points of the Speech Signal'); subplot(2,1,2); plot(1:length(energy),energy); title(' Energy of the signal'); end % Evaluate Sppech_on and Speech_off Speech_on = round((N1_w*1000)/Fs); Speech_off = round((N2_w*1000)/Fs); Speech_on = round(Speech_on - 0.05*Speech_on); fprintf(1,'\n Speech Onset for %s file : %d msec \n', FILE,Speech_on ); fprintf(1,' Speech offset for %s file : %d msec\n', FILE,Speech_off);