VoxForge
Hello. I m trying to read from an wavfile. Basically the wavfile is supposed to contain digits as well as words. I need to extract only digits from this file. First i tried the same program with microphone..it was working fine(with 20% accuracy), but now as i m trying to provide input from wavfile it returns me NULL.
Here's the .java and .xml file
.java file
import edu.cmu.sphinx.frontend.util.Microphone;
import edu.cmu.sphinx.frontend.util.AudioFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;
import java.util.StringTokenizer;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
/** A simple Sphinx-4 application that decodes a .WAV file containing connnected-digits audio data. */
public class WavFile
{
public static void main(String[] args) throws MalformedURLException
{
URL audioFileURL;
URL configURL;
// use defaults that are loaded from the WavFile.jar or use values provided as arguments to main
// if (args.length == 2) {
configURL = new File(args[0]).toURI().toURL();
audioFileURL = new File(args[1]).toURI().toURL();
// } else {
//audioFileURL = WavFile.class.getResource("12345.wav");
// configURL = WavFile.class.getResource("config.xml");
// }
System.out.println("Loading Recognizer as defined in '" + configURL.toString() + "'...\n");
ConfigurationManager cm = new ConfigurationManager(configURL);
// look up the recognizer (which will also lookup all its dependencies
Recognizer recognizer = (Recognizer) cm.lookup("recognizer");
// recognizer.allocate();
//Microphone microphone = (Microphone) cm.lookup("microphone");
//System.out.println(microphone);
/* allocate the resource necessary for the recognizer */
recognizer.allocate();
/* the microphone will keep recording until the program exits
if (microphone.startRecording())
{
System.out.println("Say any digit(s): e.g. \"two oh oh four\", " +
"\"three six five\".");
while (true)
{
System.out.println
("Start speaking. Press Ctrl-C to quit.\n");*/
// configure the audio input for the recognizer
AudioFileDataSource dataSource = (AudioFileDataSource) cm.lookup("audioFileDataSource");
dataSource.setAudioFile(audioFileURL, null);
// decode the audio file.
// System.out.println("Decoding " + audioFileURL);
Result result = recognizer.recognize();
System.out.println("The result is:" + result);
// String str=result.getBestPronunciationResult();
// if(!str.isEmpty()){
// StringTokenizer st = new StringTokenizer (dataSource);
// while (st.hasMoreTokens()) {
// System.out.println(st.nextToken());
//System.out.println(st);}
//System.out.println("Result: " + (result !=null ? result.getBestPronunciationResult() : null));
System.out.println("Result: " + (result != null ? result.getBestFinalResultNoFiller() : null));
}
}
This is .xml file
<?xml version="1.0" encoding="UTF-8"?>
<!--
Sphinx-4 Configuration file
-->
<!-- ******************************************************** -->
<!-- tidigits configuration file -->
<!-- ******************************************************** -->
<config>
<!-- ******************************************************** -->
<!-- frequently tuned properties -->
<!-- ******************************************************** -->
<property name="logLevel" value="WARNING"/>
<property name="absoluteBeamWidth" value="-1"/>
<property name="relativeBeamWidth" value="1E-80"/>
<property name="wordInsertionProbability" value="1E-36"/>
<property name="languageWeight" value="8"/>
<property name="recognizer" value="recognizer"/>
<property name="linguist" value="flatLinguist"/>
<property name="frontend" value="mfcFrontEnd"/>
<property name="showCreations" value="true"/>
<!-- ******************************************************** -->
<!-- The connectedDigitsRecognizer configuration -->
<!-- ******************************************************** -->
<component name="recognizer"
type="edu.cmu.sphinx.recognizer.Recognizer">
<property name="decoder" value="decoder"/>
<propertylist name="monitors">
<item>accuracyTracker </item>
<item>speedTracker </item>
<item>memoryTracker </item>
</propertylist>
</component>
<!-- ******************************************************** -->
<!-- The Decoder configuration -->
<!-- ******************************************************** -->
<component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
<property name="searchManager" value="searchManager"/>
</component>
<component name="searchManager"
type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
<property name="logMath" value="logMath"/>
<property name="linguist" value="flatLinguist"/>
<property name="pruner" value="trivialPruner"/>
<property name="scorer" value="threadedScorer"/>
<property name="activeListFactory" value="activeList"/>
</component>
<component name="activeList"
type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<component name="trivialPruner"
type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>
<component name="threadedScorer"
type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
<property name="frontend" value="${frontend}"/>
</component>
<!-- ******************************************************** -->
<!-- The linguist configuration -->
<!-- ******************************************************** -->
<component name="flatLinguist"
type="edu.cmu.sphinx.linguist.flat.FlatLinguist">
<property name="logMath" value="logMath"/>
<property name="grammar" value="jsgfGrammar"/>
<property name="acousticModel" value="wsj"/>
<property name="wordInsertionProbability"
value="${wordInsertionProbability}"/>
<property name="languageWeight" value="${languageWeight}"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Grammar configuration -->
<!-- ******************************************************** -->
<component name="jsgfGrammar" type="edu.cmu.sphinx.jsapi.JSGFGrammar">
<property name="dictionary" value="dictionary"/>
<property name="grammarLocation"
value="file:/C:/F/Speech Recognition Project/sphinx4/"/>
<property name="grammarName" value="digits"/>
<property name="logMath" value="logMath"/>
</component>
<!-- ******************************************************** -->
<!-- The Dictionary configuration -->
<!-- ******************************************************** -->
<component name="dictionary"
type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
<property name="dictionaryPath"
value="file:/C:/F/Speech Recognition Project/sphinx4/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/alN.dict"/>
<property name="fillerPath"
value="file:/C:/F/Speech Recognition Project/sphinx4/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/dict/fillerdict"/>
<property name="addSilEndingPronunciation" value="false"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The acoustic model configuration -->
<!-- ******************************************************** -->
<component name="wsj"
type="edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model">
<property name="loader" value="wsjLoader"/>
<property name="unitManager" value="unitManager"/>
</component>
<component name="wsjLoader"
type="edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.ModelLoader">
<property name="logMath" value="logMath"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The unit manager configuration -->
<!-- ******************************************************** -->
<component name="unitManager"
type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
<!-- ******************************************************** -->
<!-- The live frontend configuration -->
<!-- ******************************************************** -->
<component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
<propertylist name="pipeline">
<item>audioFileDataSource</item>
<item>dataBlocker </item>
<item>speechClassifier </item>
<item>speechMarker </item>
<item>nonSpeechDataFilter </item>
<item>preemphasizer </item>
<item>windower </item>
<item>fft </item>
<item>melFilterBank </item>
<item>dct </item>
<item>liveCMN </item>
<item>featureExtraction </item>
</propertylist>
</component>
<component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>
<!-- <component name="preemphasizer"
type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>
<component name="dither"
type="edu.cmu.sphinx.frontend.filter.Dither"/>
<component name="windower"
type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>
<component name="fft"
type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>
<component name="melFilterBank"
type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>
<component name="dct"
type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>
<component name="batchCMN"
type="edu.cmu.sphinx.frontend.feature.BatchCMN"/>
<component name="featureExtraction"
type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>-->
<!-- ******************************************************** -->
<!-- The frontend pipelines -->
<!-- ******************************************************** -->
<component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker">
<!--<property name="blockSizeMs" value="10"/>-->
</component>
<component name="speechClassifier"
type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
<property name="threshold" value="13"/>
</component>
<component name="nonSpeechDataFilter"
type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>
<component name="speechMarker"
type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" >
<property name="speechTrailer" value="50"/>
</component>
<component name="preemphasizer"
type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>
<component name="windower"
type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
</component>
<component name="fft"
type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
</component>
<component name="melFilterBank"
type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
</component>
<component name="dct"
type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>
<component name="liveCMN"
type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>
<component name="featureExtraction"
type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>
<!-- <component name="microphone" type="edu.cmu.sphinx.frontend.util.Microphone">
<property name="closeBetweenUtterances" value="false"/>
</component>
*******************************************************
monitors
******************************************************* -->
<component name="accuracyTracker"
type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="showAlignedResults" value="false"/>
<property name="showRawResults" value="false"/>
</component>
<component name="memoryTracker"
type="edu.cmu.sphinx.instrumentation.MemoryTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="showSummary" value="false"/>
<property name="showDetails" value="false"/>
</component>
<component name="speedTracker"
type="edu.cmu.sphinx.instrumentation.SpeedTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="frontend" value="${frontend}"/>
<property name="showSummary" value="true"/>
<property name="showDetails" value="false"/>
</component>
<!-- ******************************************************* -->
<!-- Miscellaneous components -->
<!-- ******************************************************* -->
<component name="logMath" type="edu.cmu.sphinx.util.LogMath">
<property name="logBase" value="1.0001"/>
<property name="useAddTable" value="true"/>
</component>
</config>
--- (Edited on 12/28/2009 12:50 am [GMT-0600] by [email protected]) ---
>Hello. I m trying to read from an wavfile.
Can you get Sphinx4 to recognize the speech from the wavfile directly (as opposed to doing it from your Java app)?
--- (Edited on 1/26/2010 10:58 pm [GMT-0500] by kmaclean) ---