Reading Input from wavfiles in sphinx4

Speech Recognition Engines

Flat

User: [email protected]
Date: 12/28/2009 12:50 am

Views: 10721
Rating: 3

Hello. I m trying to read from an wavfile. Basically the wavfile is supposed to contain digits as well as words. I need to extract only digits from this file. First i tried the same program with microphone..it was working fine(with 20% accuracy), but now as i m trying to provide input from wavfile it returns me NULL.

Here's the .java and .xml file

.java file

import edu.cmu.sphinx.frontend.util.Microphone;

import edu.cmu.sphinx.frontend.util.AudioFileDataSource;

import edu.cmu.sphinx.recognizer.Recognizer;

import edu.cmu.sphinx.result.Result;

import edu.cmu.sphinx.util.props.ConfigurationManager;

import java.util.StringTokenizer;

import java.io.File;

import java.net.MalformedURLException;

import java.net.URL;

/** A simple Sphinx-4 application that decodes a .WAV file containing connnected-digits audio data. */

public class WavFile

{

public static void main(String[] args) throws MalformedURLException

{

URL audioFileURL;

URL configURL;

// use defaults that are loaded from the WavFile.jar or use values provided as arguments to main

// if (args.length == 2) {

configURL = new File(args[0]).toURI().toURL();

audioFileURL = new File(args[1]).toURI().toURL();

// } else {

//audioFileURL = WavFile.class.getResource("12345.wav");

// configURL = WavFile.class.getResource("config.xml");

// }

System.out.println("Loading Recognizer as defined in '" + configURL.toString() + "'...\n");

ConfigurationManager cm = new ConfigurationManager(configURL);

// look up the recognizer (which will also lookup all its dependencies

Recognizer recognizer = (Recognizer) cm.lookup("recognizer");

// recognizer.allocate();

//Microphone microphone = (Microphone) cm.lookup("microphone");

//System.out.println(microphone);

/* allocate the resource necessary for the recognizer */

recognizer.allocate();

/* the microphone will keep recording until the program exits

if (microphone.startRecording())

{

System.out.println("Say any digit(s): e.g. \"two oh oh four\", " +

"\"three six five\".");

while (true)

{

System.out.println

("Start speaking. Press Ctrl-C to quit.\n");*/

// configure the audio input for the recognizer

AudioFileDataSource dataSource = (AudioFileDataSource) cm.lookup("audioFileDataSource");

dataSource.setAudioFile(audioFileURL, null);

// decode the audio file.

// System.out.println("Decoding " + audioFileURL);

Result result = recognizer.recognize();

System.out.println("The result is:" + result);

// String str=result.getBestPronunciationResult();

// if(!str.isEmpty()){

// StringTokenizer st = new StringTokenizer (dataSource);

// while (st.hasMoreTokens()) {

// System.out.println(st.nextToken());

//System.out.println(st);}

//System.out.println("Result: " + (result !=null ? result.getBestPronunciationResult() : null));

System.out.println("Result: " + (result != null ? result.getBestFinalResultNoFiller() : null));

}

This is .xml file

<?xml version="1.0" encoding="UTF-8"?>

<!--

Sphinx-4 Configuration file

-->

<component name="recognizer"

type="edu.cmu.sphinx.recognizer.Recognizer">

<item>accuracyTracker </item>

<item>speedTracker </item>

<item>memoryTracker </item>

</propertylist>

</component>

</component>

<component name="searchManager"

type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">

</component>

<component name="activeList"

type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">

</component>

<component name="trivialPruner"

type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

<component name="threadedScorer"

type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">

</component>

<component name="flatLinguist"

type="edu.cmu.sphinx.linguist.flat.FlatLinguist">

<property name="wordInsertionProbability"

value="${wordInsertionProbability}"/>

</component>

<property name="grammarLocation"

value="file:/C:/F/Speech Recognition Project/sphinx4/"/>

</component>

<component name="dictionary"

type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">

<property name="dictionaryPath"

value="file:/C:/F/Speech Recognition Project/sphinx4/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/alN.dict"/>

<property name="fillerPath"

value="file:/C:/F/Speech Recognition Project/sphinx4/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/dict/fillerdict"/>

</component>

<component name="wsj"

type="edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model">

</component>

<component name="wsjLoader"

type="edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.ModelLoader">

</component>

<component name="unitManager"

type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>

<item>audioFileDataSource</item>

<item>dataBlocker </item>

<item>speechClassifier </item>

<item>speechMarker </item>

<item>nonSpeechDataFilter </item>

<item>preemphasizer </item>

<item>windower </item>

<item>melFilterBank </item>

<item>liveCMN </item>

<item>featureExtraction </item>

</propertylist>

</component>

<!-- <component name="preemphasizer"

type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

<component name="dither"

type="edu.cmu.sphinx.frontend.filter.Dither"/>

<component name="windower"

type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>

<component name="fft"

type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>

<component name="melFilterBank"

type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>

<component name="dct"

type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

<component name="batchCMN"

type="edu.cmu.sphinx.frontend.feature.BatchCMN"/>

<component name="featureExtraction"

type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>-->

</component>

<component name="speechClassifier"

type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">

</component>

<component name="nonSpeechDataFilter"

type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>

<component name="speechMarker"

type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" >

</component>

<component name="preemphasizer"

type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

<component name="windower"

type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">

</component>

<component name="fft"

type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">

</component>

<component name="melFilterBank"

type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">

</component>

<component name="dct"

type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

<component name="liveCMN"

type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>

<component name="featureExtraction"

type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

<!-- <component name="microphone" type="edu.cmu.sphinx.frontend.util.Microphone">

</component>

*******************************************************

monitors

******************************************************* -->

<component name="accuracyTracker"

type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">

</component>

<component name="memoryTracker"

type="edu.cmu.sphinx.instrumentation.MemoryTracker">

</component>

<component name="speedTracker"

type="edu.cmu.sphinx.instrumentation.SpeedTracker">

</component>

</component>

</config>

Please Help!

--- (Edited on 12/28/2009 12:50 am [GMT-0600] by [email protected]) ---

Re: Reading Input from wavfiles in sphinx4

User: kmaclean
Date: 1/26/2010 9:58 pm

Views: 4130
Rating: 2

>Hello. I m trying to read from an wavfile.

Can you get Sphinx4 to recognize the speech from the wavfile directly (as opposed to doing it from your Java app)?

--- (Edited on 1/26/2010 10:58 pm [GMT-0500] by kmaclean) ---

Previous • Next •


Username	Password