CLAM-Development  1.4.0
AudioDescriptors.cxx
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG)
3  * UNIVERSITAT POMPEU FABRA
4  *
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  */
21 
22 
23 
24 #include <cmath>
25 #include "AudioDescriptors.hxx"
26 #include "Audio.hxx"
27 #include "OSDefines.hxx"
28 
29 namespace CLAM {
30 
31 
32 const TData AudioDescriptors::mEpsilon = 1e-5;
33 
35 {
36  MandatoryInit();
37  mpAudio=pAudio;
38 }
39 
41 {
42  MandatoryInit();
43  AddAll();
44  UpdateData();
45  SetMean(initVal);
46  SetTemporalCentroid(initVal);
47  SetEnergy(initVal);
48  SetVariance(initVal);
49  SetZeroCrossingRate(initVal);
50  SetRiseTime(initVal);
51  SetLogAttackTime(initVal);
52  SetDecrease(initVal);
53 }
54 
55 void AudioDescriptors::DefaultInit() {
56  mpAudio=0;
57 }
58 
59 
60 void AudioDescriptors::CopyInit(const AudioDescriptors & copied) {
61  mpAudio=copied.mpAudio;
62 }
63 
65  return mpAudio;
66 }
67 
68 
69 
71  mpAudio=pAudio;
72  //TODO: it may give problems because pointer passed
73  InitStats(&mpAudio->GetBuffer());
74  mIsAttackTimeComputed=false;
75 }
76 
78 {
79  if (HasMean())
80  SetMean(mpStats->GetMean());
81  if (HasTemporalCentroid())
82  SetTemporalCentroid(mpStats->GetCentroid()/mpAudio->GetSampleRate());
83  if (HasEnergy())
84  SetEnergy(mpStats->GetEnergy());
85  if(HasVariance())
86  SetVariance(mpStats->GetVariance());
87  if(HasZeroCrossingRate())
88  SetZeroCrossingRate(ComputeZeroCrossingRate());
89  if(HasRiseTime())
90  SetRiseTime(ComputeAttackTime());
91  if(HasLogAttackTime())
92  SetLogAttackTime(ComputeLogAttackTime());
93  if(HasDecrease())
94  SetDecrease(ComputeDecrease());
95 }
96 
97 TData AudioDescriptors::ComputeZeroCrossingRate()
98 {
99  DataArray& data = mpAudio->GetBuffer();
100 
101  int signChangeCount = 0;
102  const TSize size = data.Size();
103  bool wasPositive = data[0] > 0.0;
104 
105  for (int i=1; i<size; i++)
106  {
107  const bool isPositive = (data[i] > 0.0);
108  if (wasPositive == isPositive) continue;
109 
110  signChangeCount++;
111  wasPositive = isPositive;
112  }
113  // Average
114  return ((TData)signChangeCount)/size;
115 }
116 
117 TData AudioDescriptors::ComputeAttackTime()
118 {
119  if(mIsAttackTimeComputed) return mComputedAttackTime;
120 
121  const DataArray& data = mpAudio->GetBuffer();
122  const TSize dataSize = mpAudio->GetSize();
123 
124  DataArray energyEnv;
125  energyEnv.Resize(dataSize);
126  energyEnv.SetSize(dataSize);
127 
128  // Compute 20Hz lowpass filter coefficients
129  const TData omega_c = 2*PI*20/mpAudio->GetSampleRate();
130  const TData alpha = (1-sin(omega_c)) / cos(omega_c);
131 
132  const TData b0 = (1-alpha)/2;
133  const TData a1 = -alpha;
134 
135  // Find maximum value
136  energyEnv[0] = b0*CLAM::Abs(data[0]);
137  TData maxVal = energyEnv[0];
138 
139  for (TIndex i=1; i<dataSize; i++) {
140  energyEnv[i] = b0*(CLAM::Abs(data[i]) + CLAM::Abs(data[i-1])) - a1*energyEnv[i-1];
141  if (energyEnv[i] > maxVal) maxVal = energyEnv[i];
142  }
143 
144  // Locate start and stop of attack
145  const TData startThreshold = 0.02*maxVal;
146  const TData stopThreshold = 0.80*maxVal;
147 
148  TIndex startIdx;
149  for (startIdx=0; startIdx<dataSize; startIdx++) {
150  if (energyEnv[startIdx] > startThreshold) break;
151  }
152 
153  TIndex stopIdx;
154  for (stopIdx=startIdx; stopIdx<dataSize; stopIdx++) {
155  if (energyEnv[stopIdx] > stopThreshold) break;
156  }
157 
158  mComputedAttackTime=(stopIdx - startIdx) / mpAudio->GetSampleRate();
159  mIsAttackTimeComputed=true;
160  return mComputedAttackTime;
161 }
162 
163 
164 TData AudioDescriptors::ComputeLogAttackTime()
165 {
166  ComputeAttackTime();
167  if (mComputedAttackTime==0)
168  return log10(mEpsilon);
169  return log10(mComputedAttackTime);
170 }
171 
172 
173 TData AudioDescriptors::ComputeDecrease()
174 {
175  const DataArray& data = mpAudio->GetBuffer();
176  const TSize dataSize = mpAudio->GetSize();
177 
178  // Compute 20Hz lowpass filter coefficients
179  const double omega_c = 2*PI*20/mpAudio->GetSampleRate();
180  const double alpha = (1-sin(omega_c)) / cos(omega_c);
181 
182  const double b0 = (1-alpha)/2;
183  const double a1 = -alpha;
184 
185  // Find maximum value
186  double y = b0*CLAM::Abs(data[0]);
187  TData correctedY = y<mEpsilon ? mEpsilon : y;
188  double logEnv = log10(correctedY);
189 
190  TData maxVal = logEnv;
191  TSize maxIdx = 0;
192  double sumXX = 0;
193  double sumY = 0;
194  double sumXY = 0;
195 
196  for (TIndex i=1; i<dataSize; i++)
197  {
198  y = b0*(CLAM::Abs(data[i-1]) + CLAM::Abs(data[i])) - a1*y;
199  correctedY = y<mEpsilon ? mEpsilon : y;
200  const double logEnv = log10(correctedY);
201 
202  if (logEnv > maxVal)
203  {
204  maxVal = logEnv;
205  maxIdx = i;
206  sumXX = 0;
207  sumY = 0;
208  sumXY = 0;
209  }
210  sumY += logEnv;
211  sumXY += i*logEnv;
212  sumXX += i*i;
213  }
214 
215  // Compute means and gradient of decay part
216  const long N = dataSize - maxIdx;
217  TData sumX = N*(N + 2*maxIdx - 1)/2;
218 
219  TData num = N * sumXY - sumX * sumY;
220  TData den = N * sumXX - sumX * sumX;
221 
222  return (num / den) * mpAudio->GetSampleRate();
223 }
224 
225 
227 {
228 
229  AudioDescriptors tmpD(a);
230 
231  if (a.HasMean())
232  {
233  tmpD.SetMean(a.GetMean()*mult);
234  }
235  if (a.HasTemporalCentroid())
236  {
237  tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*mult);
238  }
239  if (a.HasEnergy())
240  {
241  tmpD.SetEnergy(a.GetEnergy()*mult);
242  }
243  if(a.HasVariance())
244  {
245  tmpD.SetVariance(a.GetVariance()*mult);
246  }
247  if(a.HasZeroCrossingRate())
248  {
249  tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*mult);
250  }
251  if(a.HasRiseTime())
252  {
253  tmpD.SetRiseTime(a.GetRiseTime()*mult);
254  }
255  if(a.HasLogAttackTime())
256  {
257  tmpD.SetLogAttackTime(a.GetLogAttackTime()*mult);
258  }
259  if(a.HasDecrease())
260  {
261  tmpD.SetDecrease(a.GetDecrease()*mult);
262  }
263  return tmpD;
264 }
265 
267 {
268  return a*mult;
269 }
270 
272 {
273  AudioDescriptors tmpD;
274 
275  if (a.HasMean() && b.HasMean() )
276  {
277  tmpD.AddMean();
278  tmpD.UpdateData();
279  tmpD.SetMean(a.GetMean()*b.GetMean() );
280  }
281  if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
282  {
283  tmpD.AddTemporalCentroid();
284  tmpD.UpdateData();
285  tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*b.GetTemporalCentroid() );
286  }
287  if (a.HasEnergy() && b.HasEnergy() )
288  {
289  tmpD.AddEnergy();
290  tmpD.UpdateData();
291  tmpD.SetEnergy(a.GetEnergy()*b.GetEnergy() );
292  }
293  if(a.HasVariance() && b.HasVariance() )
294  {
295  tmpD.AddVariance();
296  tmpD.UpdateData();
297  tmpD.SetVariance(a.GetVariance()*b.GetVariance() );
298  }
299  if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
300  {
301  tmpD.AddZeroCrossingRate();
302  tmpD.UpdateData();
303  tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*b.GetZeroCrossingRate() );
304  }
305  if(a.HasRiseTime() && b.HasRiseTime() )
306  {
307  tmpD.AddRiseTime();
308  tmpD.UpdateData();
309  tmpD.SetRiseTime(a.GetRiseTime()*b.GetRiseTime() );
310  }
311  if(a.HasLogAttackTime() && b.HasLogAttackTime() )
312  {
313  tmpD.AddLogAttackTime();
314  tmpD.UpdateData();
315  tmpD.SetLogAttackTime(a.GetLogAttackTime()*b.GetLogAttackTime() );
316  }
317  if(a.HasDecrease() && b.HasDecrease() )
318  {
319  tmpD.AddDecrease();
320  tmpD.UpdateData();
321  tmpD.SetDecrease(a.GetDecrease()*b.GetDecrease() );
322  }
323  return tmpD;
324 }
325 
327 {
328  AudioDescriptors tmpD;
329 
330  if (a.HasMean() && b.HasMean() )
331  {
332  tmpD.AddMean();
333  tmpD.UpdateData();
334  tmpD.SetMean(a.GetMean()+b.GetMean() );
335  }
336  if (a.HasTemporalCentroid() && b.HasTemporalCentroid() )
337  {
338  tmpD.AddTemporalCentroid();
339  tmpD.UpdateData();
340  tmpD.SetTemporalCentroid(a.GetTemporalCentroid()+b.GetTemporalCentroid() );
341  }
342  if (a.HasEnergy() && b.HasEnergy() )
343  {
344  tmpD.AddEnergy();
345  tmpD.UpdateData();
346  tmpD.SetEnergy(a.GetEnergy()+b.GetEnergy() );
347  }
348  if(a.HasVariance() && b.HasVariance() )
349  {
350  tmpD.AddVariance();
351  tmpD.UpdateData();
352  tmpD.SetVariance(a.GetVariance()+b.GetVariance() );
353  }
354  if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() )
355  {
356  tmpD.AddZeroCrossingRate();
357  tmpD.UpdateData();
358  tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()+b.GetZeroCrossingRate() );
359  }
360  if(a.HasRiseTime() && b.HasRiseTime() )
361  {
362  tmpD.AddRiseTime();
363  tmpD.UpdateData();
364  tmpD.SetRiseTime(a.GetRiseTime()+b.GetRiseTime() );
365  }
366  if(a.HasLogAttackTime() && b.HasLogAttackTime() )
367  {
368  tmpD.AddLogAttackTime();
369  tmpD.UpdateData();
370  tmpD.SetLogAttackTime(a.GetLogAttackTime()+b.GetLogAttackTime() );
371  }
372  if(a.HasDecrease() && b.HasDecrease() )
373  {
374  tmpD.AddDecrease();
375  tmpD.UpdateData();
376  tmpD.SetDecrease(a.GetDecrease()+b.GetDecrease() );
377  }
378  return tmpD;
379 
380 }
381 
383 {
384  return a+((-1)*b);
385 }
386 
388 {
389  return a*(1/div);
390 }
391 
392 }
393