CLAM-Development  1.4.0
Normalization.cxx
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG)
3  * UNIVERSITAT POMPEU FABRA
4  *
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  */
21 #include "DataTypes.hxx"
22 #include "CLAM_Math.hxx"
23 #include "Normalization.hxx"
24 #include "Stats.hxx"
25 
26 
27 
28 namespace CLAM
29 {
30 
31 
33  {
34  AddType();
35  AddFrameSize();
36  UpdateData();
37  SetType(1);
38  SetFrameSize(2205);//0.05s at 44.1k
39 
40  }
41 
42 
44  : mIsSilenceCtrl( "Silence", this )
45  {
47  }
48 
50  : mIsSilenceCtrl( "Silence", this )
51  {
52  Configure(c);
53  }
54 
56 
57 
58  bool Normalization::ConcreteConfigure(const ProcessingConfig& c)
59  {
60  CopyAsConcreteConfig(mConfig,c);
61 
62  mType=mConfig.GetType();
63  mFrameSize=mConfig.GetFrameSize();
64 
65  return true;
66  }
67 
68  bool Normalization::Do(void)
69  {
70  return false;
71  }
72 
74  {
75  TData scaleFactor = 0;
76 
77  //Type #1: normalizes according to the max energy
78  //Type #2: normalizes according to the average energy
79  //Type #3: normalizes according to the threshold under which lies percent% of
80  //the energy values that are not silence
81 
82  if ( mType == 1 )
83  scaleFactor = ComputeScaleFactorFromMaxEnergy( in.GetBuffer() );
84  else if ( mType == 2 )
85  scaleFactor = ComputeScaleFactorFromAvgEnergy( in.GetBuffer() );
86  else if ( mType == 3 )
87  scaleFactor = ComputeScaleFactorFromDominantEnergy( in.GetBuffer() );
88 
89  const TData invScaleFactor = 1.0 / scaleFactor;
90  DataArray& inBufferSamples = in.GetBuffer();
91 
92  for (int n=0; n<in.GetSize(); n++)
93  inBufferSamples[n]*=invScaleFactor;
94 
95  return true;
96  }
97 
98 
99  bool Normalization::Do(const Audio& unnorm, Audio& norm)
100  {
101  CLAM_ASSERT( unnorm.GetSize() == norm.GetSize(),
102  "Normalization::Do() : input and output audio sizes must match" );
103 
104  TData scaleFactor = 0;
105 
106  //Type #1: normalizes according to the max energy
107  //Type #2: normalizes according to the average energy
108  //Type #3: normalizes according to the threshold under which lies percent% of
109  //the energy values that are not silence
110 
111  if ( mType == 1 )
112  scaleFactor = ComputeScaleFactorFromMaxEnergy( unnorm.GetBuffer() );
113  else if ( mType == 2 )
114  scaleFactor = ComputeScaleFactorFromAvgEnergy( unnorm.GetBuffer() );
115  else if ( mType == 3 )
116  scaleFactor = ComputeScaleFactorFromDominantEnergy( unnorm.GetBuffer() );
117 
118  const TData invScaleFactor = 1.0 / scaleFactor;
119  DataArray& outBufferSamples = norm.GetBuffer();
120  const DataArray& inBufferSamples = unnorm.GetBuffer();
121 
122  for (int n=0; n<norm.GetSize(); n++)
123  outBufferSamples[n]=inBufferSamples[n]*invScaleFactor;
124 
125  return true;
126  }
127 
128 
130  {
131  TIndex p = 0;
132  const TIndex end = inAudio.Size() - mFrameSize;
133  DataArray chunk;
134  TData maxEnergy = 0.0;
135  const TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 );
136 
137  do
138  {
139  chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize );
140  /* unused: TSize size = chunk.GetSize(); */
141  DataArray moments(4);
142  moments.SetSize(4);
143  Stats myStats(&chunk);
144  myStats.GetMoments(moments, FifthOrder);
145 
146  TData currentChunkEnergy = myStats.GetEnergy();
147 
148  //remove silence
149  if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization
150  {
151  if(maxEnergy<currentChunkEnergy) maxEnergy=currentChunkEnergy;
152  }
153 
154  p += mFrameSize;
155 
156  } while (p <= end );
157 
158  // Enjoy the Silence...
159  if ( maxEnergy <= 1e-7 )
160  {
161  mIsSilenceCtrl.SendControl( true );
162  return 1.0;
163  }
164 
165  mIsSilenceCtrl.SendControl(false);
166 
167  return CLAM_sqrt( maxEnergy / TData(mFrameSize ) );
168 
169 
170  }
171 
173  {
174 
175  TIndex p = 0;
176  const TIndex end = inAudio.Size() - mFrameSize;
177  DataArray chunk;
178  TData avgEnergy = 0.0;
179  const TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 );
180 
181  do
182  {
183  chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize );
184  DataArray moments(4);
185  moments.SetSize(4);
186  Stats myStats(&chunk);
187  myStats.GetMoments(moments, FifthOrder);
188 
189  TData currentChunkEnergy = myStats.GetEnergy();
190 
191  //remove silence
192  if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization
193  {
194  avgEnergy += currentChunkEnergy;
195  }
196 
197  p += mFrameSize;
198 
199  } while (p <= end );
200 
201  avgEnergy /= TData( inAudio.Size() );
202 
203  // Enjoy the Silence...
204  if ( avgEnergy <= 1e-7 )
205  {
206  mIsSilenceCtrl.SendControl( true );
207  return 1.0;
208  }
209 
210  mIsSilenceCtrl.SendControl(false);
211 
212  return CLAM_sqrt( avgEnergy );
213 
214  }
215 
217  {
218  TIndex p = 0;
219  const TIndex end = inAudio.Size() - mFrameSize;
220  DataArray chunk;
221  DataArray chunksEnergies;
222  const TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 );
223 
224  do
225  {
226  chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize );
227  DataArray moments(4);
228  moments.SetSize(4);
229  Stats myStats(&chunk);
230  myStats.GetMoments(moments, FifthOrder);
231 
232  TData currentChunkEnergy = myStats.GetEnergy();
233 
234  //remove silence
235  if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization
236  {
237  chunksEnergies.AddElem( currentChunkEnergy );
238  }
239 
240  p += mFrameSize;
241  } while (p <= end );
242 
243  // Enjoy the silence...
244  if ( chunksEnergies.Size() == 0 )
245  {
246  mIsSilenceCtrl.SendControl( true );
247  return 1.0;
248  }
249 
250  std::sort( chunksEnergies.GetPtr(), chunksEnergies.GetPtr()+chunksEnergies.Size() );
251 
252  //find the threshold under which lies percent% of the energy values
253  //that are not silence
254 
255  int percentage = 90;
256 
257  int i = ( chunksEnergies.Size()*percentage ) / 100;
258 
259  i = ( i == 0 ) ? i : i - 1;
260 
261  mIsSilenceCtrl.SendControl(false);
262 
263  return CLAM_sqrt( chunksEnergies[i]/TData(mFrameSize) );
264 
265  }
266 
267  void Normalization::CheckSilence( int size )
268  {
269  if (size==0)
270  mIsSilenceCtrl.SendControl(true);
271  else
272  mIsSilenceCtrl.SendControl(false);
273  }
274 
275 
276 }
277