Skip to content

Commit

Permalink
url to wav
Browse files Browse the repository at this point in the history
  • Loading branch information
jcbang committed Sep 26, 2020
1 parent e2a97d5 commit dd2b809
Show file tree
Hide file tree
Showing 34 changed files with 2,237 additions and 202 deletions.
Binary file not shown.
Binary file modified gene-pool-backend/.vs/gene-pool-backend/v16/.suo
Binary file not shown.
113 changes: 76 additions & 37 deletions gene-pool-backend/Controllers/SpeechToTextController.cs
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using MediaToolkit;
using MediaToolkit.Model;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using VideoLibrary;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System.Drawing.Printing;
using System.Reflection;
using System.IO;
using System.Text;
using NAudio.Wave;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Models;
using VideoLibrary;
using MediaToolkit.Model;
using NAudio.Utils;

namespace gene_pool_backend.Controllers {
public class MyFile {
Expand All @@ -33,30 +32,18 @@ public async Task<IActionResult> WavFileTranscribe([FromForm] MyFile files) {
var config = SpeechConfig.FromSubscription("0afaff0095f946eaa101f44563f3c341", "eastus2");
var stopRecognition = new TaskCompletionSource<int>();

/*
byte channels = 1;
byte bitsPerSample = 16;
int samplesPerSecond = 16000; // or 8000
var audioFormat = AudioStreamFormat.GetWaveFormatPCM((uint)samplesPerSecond, bitsPerSample, channels);
PushAudioInputStream audioStream = AudioInputStream.CreatePushStream(audioFormat);
BinaryReader reader = new BinaryReader(files.file.OpenReadStream());
audioStream.Write(reader.ReadBytes((int) files.file.Length));
audioStream.Close();
*/

Console.WriteLine(Path.GetFullPath("test.wav"));

StringBuilder sb = new StringBuilder();

// using (var audioConfig = AudioConfig.FromStreamInput(audioStream)) {
using (var audioConfig = AudioConfig.FromWavFileInput(Path.GetFullPath("test.wav"))) {
using (var audioConfig = AudioConfig.FromWavFileInput(Path.GetFullPath("helloworld.wav"))) {
using (var recognizer = new SpeechRecognizer(config, audioConfig)) {
// Subscribes to events.
recognizer.Recognized += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech) {
// Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
sb.Append(e.Result.Text);
} else if (e.Result.Reason == ResultReason.NoMatch) {
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
Expand Down Expand Up @@ -105,28 +92,82 @@ public async Task<IActionResult> WavFileTranscribe([FromForm] MyFile files) {
return Ok();
}

string connectionString = "DefaultEndpointsProtocol=https;AccountName=genepoolstorage;AccountKey=gYC3jnsvdZCSxQJH4hTn2kpy9SyDW5bpfB5KIjB7D0SPMu0GG7y/mlrJNFrAGi56kadHW+VDwsxoYKvb3eaCAw==;EndpointSuffix=core.windows.net";
string containerName = "genepoolcontainer";
string blobFileName = "helloworld";

[HttpPost]
[Route("byte_file_transcribe")]
public async Task<IActionResult> ByteFileTranscribe([FromForm] MyFile files) {
Console.WriteLine(files);
Console.WriteLine(files.test);
Console.WriteLine(files.file);
[Route("upload_file")]
public async Task<IActionResult> UploadFile ([FromForm] MyFile files) {
// Create a BlobServiceClient object which will be used to create a container client
BlobServiceClient blobServiceClient = new BlobServiceClient(connectionString);

// Create the container and return a container client object
BlobContainerClient containerClient;
try {
containerClient = await blobServiceClient.CreateBlobContainerAsync(containerName);
} catch {
containerClient = blobServiceClient.GetBlobContainerClient(containerName);
}

string fileName = $"{blobFileName}.wav";

// Get a reference to a blob
BlobClient blobClient = containerClient.GetBlobClient(fileName);

Console.WriteLine("Uploading to Blob storage as blob:\n\t {0}\n", blobClient.Uri);

// Open the file and upload its data
using (var stream = files.file.OpenReadStream()) {
await blobClient.UploadAsync(stream, true);
}

return Ok();
}

[HttpPost]
[Route("link_to_wav")]
public async Task<IActionResult> LinkToWav([FromForm] MyFile files) {
FileHelpers.SaveVideoToDisk("https://www.youtube.com/watch?v=tpIctyqH29Q");
FileHelpers.ToWavFormat("hello.mp4", "hello.wav");

return Ok();
}

[HttpPost]
[Route("blob_to_text")]
public async Task<IActionResult> BlobToText([FromForm] MyFile files) {
var config = SpeechConfig.FromSubscription("0afaff0095f946eaa101f44563f3c341", "eastus2");

var stopRecognition = new TaskCompletionSource<int>();

Console.WriteLine(Path.GetFullPath("test.wav"));
// Create a BlobServiceClient object which will be used to create a container client
BlobServiceClient blobServiceClient = new BlobServiceClient(connectionString);

StringBuilder sb = new StringBuilder();
// Create the container and return a container client object
BlobContainerClient containerClient;
try {
containerClient = await blobServiceClient.CreateBlobContainerAsync(containerName);
} catch {
containerClient = blobServiceClient.GetBlobContainerClient(containerName);
}

// using (var audioConfig = AudioConfig.FromStreamInput(audioStream)) {
using (var audioConfig = AudioConfig.FromWavFileInput(Path.GetFullPath("test.wav"))) {
using (var recognizer = new SpeechRecognizer(config, audioConfig)) {
BlobClient blobClient = containerClient.GetBlobClient($"{blobFileName}.wav");
BlobDownloadInfo download = await blobClient.DownloadAsync();

// Create an audio stream from a wav file.
// Replace with your own audio file name.
using (var audioInput = Utility.OpenWavFile(new BinaryReader(download.Content))) {
// Creates a speech recognizer using audio stream input.
using (var recognizer = new SpeechRecognizer(config, audioInput)) {
// Subscribes to events.
recognizer.Recognizing += (s, e) => {
Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}");
};

recognizer.Recognized += (s, e) => {
if (e.Result.Reason == ResultReason.RecognizedSpeech) {
// Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
sb.Append(e.Result.Text);
Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
} else if (e.Result.Reason == ResultReason.NoMatch) {
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
}
Expand All @@ -145,11 +186,11 @@ public async Task<IActionResult> ByteFileTranscribe([FromForm] MyFile files) {
};

recognizer.SessionStarted += (s, e) => {
Console.WriteLine("\n Session started event.");
Console.WriteLine("\nSession started event.");
};

recognizer.SessionStopped += (s, e) => {
Console.WriteLine("\n Session stopped event.");
Console.WriteLine("\nSession stopped event.");
Console.WriteLine("\nStop recognition.");
stopRecognition.TrySetResult(0);
};
Expand All @@ -166,8 +207,6 @@ public async Task<IActionResult> ByteFileTranscribe([FromForm] MyFile files) {
}
}

Console.WriteLine(sb.ToString());

return Ok();
}
}
Expand Down
172 changes: 172 additions & 0 deletions gene-pool-backend/FileManipulation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
using NAudio.Wave;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
using VideoLibrary;

namespace gene_pool_backend {
public static class BinaryWriterExtensions {
private const int HeaderSize = 44;

private const int Hz = 16000; //frequency or sampling rate

private const float RescaleFactor = 32767; //to convert float to Int16

public static void AppendWaveData<T>(this T stream, float[] buffer)
where T : Stream {
if (stream.Length > HeaderSize) {
stream.Seek(0, SeekOrigin.End);
} else {
stream.SetLength(HeaderSize);
stream.Position = HeaderSize;
}

// rescale
var floats = Array.ConvertAll(buffer, x => (short)(x * RescaleFactor));

// Copy to bytes
var result = new byte[floats.Length * sizeof(short)];
Buffer.BlockCopy(floats, 0, result, 0, result.Length);

// write to stream
stream.Write(result, 0, result.Length);

// Update Header
UpdateHeader(stream);
}

public static void UpdateHeader(Stream stream) {
var writer = new BinaryWriter(stream);

writer.Seek(0, SeekOrigin.Begin);

writer.Write(Encoding.ASCII.GetBytes("RIFF")); //RIFF marker. Marks the file as a riff file. Characters are each 1 byte long.
writer.Write((int)(writer.BaseStream.Length - 8)); //file-size (equals file-size - 8). Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
writer.Write(Encoding.ASCII.GetBytes("WAVE")); //File Type Header. For our purposes, it always equals "WAVE".
writer.Write(Encoding.ASCII.GetBytes("fmt ")); //Mark the format section. Format chunk marker. Includes trailing null.
writer.Write(16); //Length of format data. Always 16.
writer.Write((short)1); //Type of format (1 is PCM, other number means compression) . 2 byte integer. Wave type PCM
writer.Write((short)2); //Number of Channels - 2 byte integer
writer.Write(Hz); //Sample Rate - 32 byte integer. Sample Rate = Number of Samples per second, or Hertz.
writer.Write(Hz * 2 * 1); // sampleRate * bytesPerSample * number of channels, here 16000*2*1.
writer.Write((short)(1 * 2)); //channels * bytesPerSample, here 1 * 2 // Bytes Per Sample: 1=8 bit Mono, 2 = 8 bit Stereo or 16 bit Mono, 4 = 16 bit Stereo
writer.Write((short)16); //Bits per sample (BitsPerSample * Channels) ?? should be 8???
writer.Write(Encoding.ASCII.GetBytes("data")); //"data" chunk header. Marks the beginning of the data section.
writer.Write((int)(writer.BaseStream.Length - HeaderSize)); //Size of the data section. data-size (equals file-size - 44). or NumSamples * NumChannels * bytesPerSample ??
}
} //end of class

public static class FileHelpers {
public static byte[] ReadToEnd(Stream stream) {
long originalPosition = 0;

if (stream.CanSeek) {
originalPosition = stream.Position;
stream.Position = 0;
}

byte[] readBuffer = new byte[4096];

int totalBytesRead = 0;
int bytesRead;

while ((bytesRead = stream.Read(readBuffer, totalBytesRead, readBuffer.Length - totalBytesRead)) > 0) {
totalBytesRead += bytesRead;

if (totalBytesRead == readBuffer.Length) {
int nextByte = stream.ReadByte();
if (nextByte != -1) {
byte[] temp = new byte[readBuffer.Length * 2];
Buffer.BlockCopy(readBuffer, 0, temp, 0, readBuffer.Length);
Buffer.SetByte(temp, totalBytesRead, (byte)nextByte);
readBuffer = temp;
totalBytesRead++;
}
}
}

byte[] buffer = readBuffer;
if (readBuffer.Length != totalBytesRead) {
buffer = new byte[totalBytesRead];
Buffer.BlockCopy(readBuffer, 0, buffer, 0, totalBytesRead);
}
return buffer;
}

public static float[] ConvertByteToFloat(byte[] array) {
float[] floatArr = new float[array.Length / 4];
for (int i = 0; i < floatArr.Length; i++) {
if (BitConverter.IsLittleEndian) {
Array.Reverse(array, i * 4, 4);
}
floatArr[i] = BitConverter.ToSingle(array, i * 4);
}
return floatArr;
}

public static void ConvertToWAVOLD(byte [] video) {
// contentAsByteArray consists of video bytes
MemoryStream contentAsMemoryStream = new MemoryStream(video);

using (WaveStream pcmStream =
WaveFormatConversionStream.CreatePcmStream(
new StreamMediaFoundationReader(contentAsMemoryStream))) {
WaveStream blockAlignReductionStream = new BlockAlignReductionStream(pcmStream);

// Do something with the wave stream
using (var stream = new FileStream("hello.wav", FileMode.OpenOrCreate, FileAccess.ReadWrite)) {
stream.AppendWaveData(ConvertByteToFloat(ReadToEnd(blockAlignReductionStream)));
}
}
}

public static string PathToFfmpeg { get; set; }

public static void ToWavFormat(string pathToMp4, string pathToWav) {
PathToFfmpeg = "ffmpeg.exe";

var ffmpeg = new Process {
StartInfo = { UseShellExecute = false, RedirectStandardError = true, FileName = PathToFfmpeg }
};

var arguments =
String.Format(
@"-i ""{0}"" ""{1}""",
pathToMp4, pathToWav);

ffmpeg.StartInfo.Arguments = arguments;

try {
if (!ffmpeg.Start()) {
Console.WriteLine("Error starting");
return;
}
var reader = ffmpeg.StandardError;
string line;
while ((line = reader.ReadLine()) != null) {
Console.WriteLine(line);
}
} catch (Exception exception) {
Console.WriteLine(exception.ToString());
return;
}

ffmpeg.Close();
}

public static void SaveVideoToDisk(string link) {
var youTube = YouTube.Default; // starting point for YouTube actions
var video = youTube.GetVideo(link); // gets a Video object with info about the video
File.WriteAllBytes("hello.mp4", video.GetBytes());
}

public static void CreateEmptyFile(string filename) {
File.Create(filename).Dispose();
}
}
}
Loading

0 comments on commit dd2b809

Please sign in to comment.