
在Java中开始录制操作时,如何检测静音? 什么是PCM数据? 如何用Java计算PCM数据?


package bemukan.voiceRecognition.speechToText; import javax.sound.sampled.*; import java.io.*; public class RecordAudio { private File audioFile; protected boolean running; private ByteArrayOutputStream out; private AudioInputStream inputStream; final static float MAX_8_BITS_SIGNED = Byte.MAX_VALUE; final static float MAX_8_BITS_UNSIGNED = 0xff; final static float MAX_16_BITS_SIGNED = Short.MAX_VALUE; final static float MAX_16_BITS_UNSIGNED = 0xffff; private AudioFormat format; private float level; private int frameSize; public RecordAudio(){ getFormat(); } private AudioFormat getFormat() { File file = new File("src/Facebook/1.wav"); AudioInputStream stream; try { stream = AudioSystem.getAudioInputStream(file); format=stream.getFormat(); frameSize=stream.getFormat().getFrameSize(); return stream.getFormat(); } catch (UnsupportedAudioFileException e) { } catch (IOException e) { } return null; } public void stopAudio() { running = false; } public void recordAudio() { try { final AudioFormat format = getFormat(); DataLine.Info info = new DataLine.Info( TargetDataLine.class, format); final TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info); line.open(format); line.start(); Runnable runner = new Runnable() { int bufferSize = (int) format.getSampleRate() * format.getFrameSize(); byte buffer[] = new byte[bufferSize]; public void run() { int readPoint = 0; out = new ByteArrayOutputStream(); running = true; int sum=0; while (running) { int count = line.read(buffer, 0, buffer.length); calculateLevel(buffer,0,0); System.out.println(level); if (count > 0) { out.write(buffer, 0, count); } } line.stop(); } }; Thread captureThread = new Thread(runner); captureThread.start(); } catch (LineUnavailableException e) { System.err.println("Line unavailable: " + e); System.exit(-2); } } public File getAudioFile() { byte[] audio = out.toByteArray(); InputStream input = new ByteArrayInputStream(audio); try { final AudioFormat format = getFormat(); final AudioInputStream ais = new AudioInputStream(input, format, audio.length / format.getFrameSize()); AudioSystem.write(ais, AudioFileFormat.Type.WAVE, new File("temp.wav")); input.close(); System.out.println("New file created!"); } catch (IOException e) { System.out.println(e.getMessage()); } return new File("temp.wav"); } private void calculateLevel (byte[] buffer, int readPoint, int leftOver) { int max = 0; boolean use16Bit = (format.getSampleSizeInBits() == 16); boolean signed = (format.getEncoding() == AudioFormat.Encoding.PCM_SIGNED); boolean bigEndian = (format.isBigEndian()); if (use16Bit) { for (int i=readPoint; i<buffer.length-leftOver; i+=2) { int value = 0; // deal with endianness int hiByte = (bigEndian ? buffer[i] : buffer[i+1]); int loByte = (bigEndian ? buffer[i+1] : buffer [i]); if (signed) { short shortVal = (short) hiByte; shortVal = (short) ((shortVal << 8) | (byte) loByte); value = shortVal; } else { value = (hiByte << 8) | loByte; } max = Math.max(max, value); } // for } else { // 8 bit - no endianness issues, just sign for (int i=readPoint; i<buffer.length-leftOver; i++) { int value = 0; if (signed) { value = buffer [i]; } else { short shortVal = 0; shortVal = (short) (shortVal | buffer [i]); value = shortVal; } max = Math.max (max, value); } // for } // 8 bit // express max as float of 0.0 to 1.0 of max value // of 8 or 16 bits (signed or unsigned) if (signed) { if (use16Bit) { level = (float) max / MAX_16_BITS_SIGNED; } else { level = (float) max / MAX_8_BITS_SIGNED; } } else { if (use16Bit) { level = (float) max / MAX_16_BITS_UNSIGNED; } else { level = (float) max / MAX_8_BITS_UNSIGNED; } } } // calculateLevel } 






我不明白这个问题。 但是猜测它与speech-recognition标签有关,我有一些坏消息。 理论上,这可以使用Java Speech API完成 。 但显然没有可用于API的“语音文本”实现(仅“文本到语音”)。

我必须为语音识别项目计算rms。 但我不知道如何用Java计算。


 /** Computes the RMS volume of a group of signal sizes ranging from -1 to 1. */ public double volumeRMS(double[] raw) { double sum = 0d; if (raw.length==0) { return sum; } else { for (int ii=0; ii 




请根据您的要求调整您的代码! 在这种情况下,一个名为UMBRAL的变量(西class牙语中的阈值)……

假设您可以访问WAV文件,如字节ByteHeader …

 private Integer Byte2PosIntBig(byte Byte24, byte Byte16, byte Byte08, byte Byte00) { return new Integer ( ((Byte24) << 24)| ((Byte16 & 0xFF) << 16)| ((Byte08 & 0xFF) << 8)| ((Byte00 & 0xFF) << 0)); } 


  RandomAccessFile RAFSource = new RandomAccessFile("your old file wav", "r"); 


 int PSData = 44; byte[] Bytes = new byte[4]; byte[] ByteHeader = new byte[44]; RAFSource.seek(0); RAFSource.read(ByteHeader); int WavSize = Byte2PosIntBig(ByteHeader[43],ByteHeader[42],ByteHeader[41],ByteHeader[40]); int NumBits = Byte2PosIntBig(ByteHeader[35],ByteHeader[34]); int NumByte = NumBits/8; for (int i = PSData;i < PSData+WavSize;i+=NumByte) { int WavSample = 0; int WavResultI =0; int WavResultO = 0; if (NumByte == 2) { RAFSource.seek(i); Bytes[0] = RAFSource.readByte(); Bytes[1] = RAFSource.readByte(); WavSample = (int)(((Bytes[1]) << 8)|((Bytes[0] & 0xFF) << 0)); if (Math.abs(WavSample) < UMBRAL) { //SILENCE DETECTED!!! } } else { RAFSource.seek(i); WavSample = (short)(RAFSource.readByte() & 0xFF); short sSamT = (short)WavSample; sSamT += 128; double dSamD = (double)sSamT*Multiplier; if ((double)sSamT < UMBRAL) { //SILENCE DETECTED!!! } }