From 0d5bccd3a87f88388d581a7c4dc594d5451c2fd9 Mon Sep 17 00:00:00 2001 From: Albert Davletov Date: Mon, 8 Feb 2021 16:11:02 -0800 Subject: [PATCH] Text Analytics updates (#125) * Text Analytics updates * fix type --- Kiosk/IntelligentKioskSample.csproj | 2 +- Kiosk/ServiceHelpers/ServiceHelpers.csproj | 2 +- Kiosk/ServiceHelpers/TextAnalyticsHelper.cs | 56 +++++--- Kiosk/Views/BingNewsAnalytics.xaml.cs | 11 +- .../TextAnalyticsExplorer.xaml.cs | 130 ++++++++++-------- 5 files changed, 112 insertions(+), 89 deletions(-) diff --git a/Kiosk/IntelligentKioskSample.csproj b/Kiosk/IntelligentKioskSample.csproj index 1653d2c..e0ff380 100644 --- a/Kiosk/IntelligentKioskSample.csproj +++ b/Kiosk/IntelligentKioskSample.csproj @@ -882,7 +882,7 @@ - 5.1.0-beta.2 + 5.1.0-beta.3 6.0.0 diff --git a/Kiosk/ServiceHelpers/ServiceHelpers.csproj b/Kiosk/ServiceHelpers/ServiceHelpers.csproj index 1949570..dcd562e 100644 --- a/Kiosk/ServiceHelpers/ServiceHelpers.csproj +++ b/Kiosk/ServiceHelpers/ServiceHelpers.csproj @@ -139,7 +139,7 @@ - 5.1.0-beta.2 + 5.1.0-beta.3 6.0.0 diff --git a/Kiosk/ServiceHelpers/TextAnalyticsHelper.cs b/Kiosk/ServiceHelpers/TextAnalyticsHelper.cs index 992f779..d08d8c5 100644 --- a/Kiosk/ServiceHelpers/TextAnalyticsHelper.cs +++ b/Kiosk/ServiceHelpers/TextAnalyticsHelper.cs @@ -34,15 +34,27 @@ using Azure; using Azure.AI.TextAnalytics; using System; +using System.Collections.Generic; using System.Threading.Tasks; namespace ServiceHelpers { public static class TextAnalyticsHelper { - // NOTE 10/19/2020: Text Analytics API v3 is not available in the following regions: China North 2, China East. - // https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/migration-guide?tabs=sentiment-analysis - public static readonly string[] NotAvailableAzureRegions = new string[] { "chinanorth2", "chinaeast" }; + // NOTE 12/17/2020: Text Analytics API v3 language support + // See details: https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/language-support + public static readonly string DefaultLanguageCode = "en"; + public static readonly string[] SentimentAnalysisSupportedLanguages = { "zh", "zh-hans", "zh-hant", "en", "fr", "de", "hi", "it", "ja", "ko", "no", "pt", "pt-BR", "pt-PT", "es", "tr" }; + public static readonly string[] OpinionMiningSupportedLanguages = { "en" }; + public static readonly string[] KeyPhraseExtractionSupportedLanguages = { "da", "nl", "en", "fi", "fr", "de", "it", "ja", "ko", "no", "nb", "pl", "pt", "pt-BR", "pt-PT", "ru", "es", "sv" }; + public static readonly string[] NamedEntitySupportedLanguages = { "ar", "zh", "zh-hans", "zh-hant", "cs", "da", "nl", "en", "fi", "fr", "de", "he", "hu", "it", "ja", "ko", "no", "nb", "pl", "pt", "pt-BR", "pt-PT", "ru", "es", "sv", "tr" }; + public static readonly string[] EntityLinkingSupportedLanguages = { "en", "es" }; + public static readonly Uri LanguageSupportUri = new Uri("https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/language-support"); + public static readonly Dictionary LanguageCodeMap = new Dictionary() + { + { "zh_chs", "zh-hans" }, + { "zh_cht", "zh-hant" } + }; // Note: Data limits // See details: https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/concepts/data-limits?tabs=version-3#data-limits @@ -97,41 +109,51 @@ private static void InitializeTextAnalyticsService() client = credentials != null && endpoint != null ? new TextAnalyticsClient(endpoint, credentials) : null; } - public static async Task AnalyzeSentimentAsync(string input, string language = "en", AdditionalSentimentAnalyses sentimentAnalyses = AdditionalSentimentAnalyses.None) + public static async Task DetectLanguageAsync(string input) + { + return await client.DetectLanguageAsync(input); + } + + public static async Task AnalyzeSentimentAsync(string input, string language = null, bool includeOpinionMining = false) { - var options = new AnalyzeSentimentOptions() { AdditionalSentimentAnalyses = sentimentAnalyses }; + var options = new AnalyzeSentimentOptions() { IncludeOpinionMining = includeOpinionMining }; return await client.AnalyzeSentimentAsync(input, language, options); } - public static async Task AnalyzeSentimentAsync(string[] input, string language = "en", AdditionalSentimentAnalyses sentimentAnalyses = AdditionalSentimentAnalyses.None) + public static async Task AnalyzeSentimentAsync(string[] input, string language = null, bool includeOpinionMining = false) { - var options = new AnalyzeSentimentOptions() { AdditionalSentimentAnalyses = sentimentAnalyses }; + var options = new AnalyzeSentimentOptions() { IncludeOpinionMining = includeOpinionMining }; return await client.AnalyzeSentimentBatchAsync(input, language, options); } - public static async Task DetectLanguageAsync(string input) + public static async Task ExtractKeyPhrasesAsync(string input, string language = null) { - return await client.DetectLanguageAsync(input); + return await client.ExtractKeyPhrasesAsync(input, language); } - public static async Task RecognizeEntitiesAsync(string input) + public static async Task ExtractKeyPhrasesAsync(string[] input, string language = null) { - return await client.RecognizeEntitiesAsync(input); + return await client.ExtractKeyPhrasesBatchAsync(input, language); } - public static async Task RecognizeLinkedEntitiesAsync(string input) + public static async Task RecognizeEntitiesAsync(string input, string language = null) { - return await client.RecognizeLinkedEntitiesAsync(input); + return await client.RecognizeEntitiesAsync(input, language); } - public static async Task ExtractKeyPhrasesAsync(string input, string language = "en") + public static async Task RecognizeLinkedEntitiesAsync(string input, string language = null) { - return await client.ExtractKeyPhrasesAsync(input, language); + return await client.RecognizeLinkedEntitiesAsync(input, language); } - public static async Task ExtractKeyPhrasesAsync(string[] input, string language = "en") + public static string GetLanguageCode(DetectedLanguage detectedLanguage) { - return await client.ExtractKeyPhrasesBatchAsync(input, language); + if (LanguageCodeMap.ContainsKey(detectedLanguage.Iso6391Name)) + { + return LanguageCodeMap[detectedLanguage.Iso6391Name]; + } + + return !string.IsNullOrEmpty(detectedLanguage.Iso6391Name) ? detectedLanguage.Iso6391Name : DefaultLanguageCode; } } } diff --git a/Kiosk/Views/BingNewsAnalytics.xaml.cs b/Kiosk/Views/BingNewsAnalytics.xaml.cs index d10265e..4349a32 100644 --- a/Kiosk/Views/BingNewsAnalytics.xaml.cs +++ b/Kiosk/Views/BingNewsAnalytics.xaml.cs @@ -82,16 +82,7 @@ protected override async void OnNavigatedTo(NavigationEventArgs e) } else { - bool isNotAvailableRegion = TextAnalyticsHelper.NotAvailableAzureRegions.Any(r => SettingsHelper.Instance.TextAnalyticsApiKeyEndpoint.Contains(r, StringComparison.OrdinalIgnoreCase)); - if (isNotAvailableRegion) - { - this.page.IsEnabled = false; - await new MessageDialog("Text Analytics API v3 is not available in the following regions: China North 2, China East. Please change your Text Analytics key and region in the Settings page to a supported region.", "API key not supported").ShowAsync(); - } - else - { - this.page.IsEnabled = true; - } + this.page.IsEnabled = true; } base.OnNavigatedTo(e); diff --git a/Kiosk/Views/TextAnalyticsExplorer/TextAnalyticsExplorer.xaml.cs b/Kiosk/Views/TextAnalyticsExplorer/TextAnalyticsExplorer.xaml.cs index 5727d39..53b6f57 100644 --- a/Kiosk/Views/TextAnalyticsExplorer/TextAnalyticsExplorer.xaml.cs +++ b/Kiosk/Views/TextAnalyticsExplorer/TextAnalyticsExplorer.xaml.cs @@ -56,13 +56,18 @@ namespace IntelligentKioskSample.Views.TextAnalyticsExplorer ExperienceType = ExperienceType.Guided | ExperienceType.Business, TechnologiesUsed = TechnologyType.TextAnalytics, TechnologyArea = TechnologyAreaType.Language, - DateAdded = "2020/09/17")] + DateAdded = "2020/09/17", + DateUpdated = "2021/02/04", + UpdatedDescription = "Now supporting more languages")] public sealed partial class TextAnalyticsExplorer : Page { private static readonly Color PositiveColor = Color.FromArgb(255, 137, 196, 2); // #89c402 private static readonly Color NeutralColor = Color.FromArgb(255, 0, 120, 212); // #0078d4 private static readonly Color NegativeColor = Color.FromArgb(255, 165, 20, 25); // #a51419 + private const string NotFound = "Not found"; + private const string LanguageNotSupported = "Not supported in this language"; + public ObservableCollection OpinionMiningCollection { get; set; } = new ObservableCollection(); public ObservableCollection SampleTextList { get; set; } = new ObservableCollection(); @@ -81,21 +86,12 @@ protected override async void OnNavigatedTo(NavigationEventArgs e) } else { - bool isNotAvailableRegion = TextAnalyticsHelper.NotAvailableAzureRegions.Any(r => SettingsHelper.Instance.TextAnalyticsApiKeyEndpoint.Contains(r, StringComparison.OrdinalIgnoreCase)); - if (isNotAvailableRegion) - { - this.mainPage.IsEnabled = false; - await new MessageDialog("Text Analytics API v3 is not available in the following regions: China North 2, China East. Please change your Text Analytics key and region in the Settings page to a supported region.", "API key not supported").ShowAsync(); - } - else + this.mainPage.IsEnabled = true; + SampleTextList.AddRange(TextAnalyticsDataLoader.GetTextSamples()); + if (SampleTextList.Any()) { - this.mainPage.IsEnabled = true; - SampleTextList.AddRange(TextAnalyticsDataLoader.GetTextSamples()); - if (SampleTextList.Any()) - { - this.sampleTextComboBox.SelectedIndex = 0; - await AnalyzeTextAsync(); - } + this.sampleTextComboBox.SelectedIndex = 0; + await AnalyzeTextAsync(); } } @@ -129,58 +125,71 @@ private async Task AnalyzeTextAsync() this.progressControl.IsActive = true; DisplayProcessingUI(); + // detect language string input = this.inputText.Text; - var detectedLanguageTask = TextAnalyticsHelper.DetectLanguageAsync(input); - var detectedKeyPhrasesTask = TextAnalyticsHelper.ExtractKeyPhrasesAsync(input); - var documentSentimentTask = TextAnalyticsHelper.AnalyzeSentimentAsync(input, sentimentAnalyses: AdditionalSentimentAnalyses.OpinionMining); - var namedEntitiesResponseTask = TextAnalyticsHelper.RecognizeEntitiesAsync(input); - var linkedEntitiesResponseTask = TextAnalyticsHelper.RecognizeLinkedEntitiesAsync(input); - - await Task.WhenAll(detectedLanguageTask, detectedKeyPhrasesTask, documentSentimentTask, namedEntitiesResponseTask, linkedEntitiesResponseTask); - var detectedLanguage = detectedLanguageTask.Result; - var detectedKeyPhrases = detectedKeyPhrasesTask.Result; - var documentSentiment = documentSentimentTask.Result; - var namedEntitiesResponse = namedEntitiesResponseTask.Result; - var linkedEntitiesResponse = linkedEntitiesResponseTask.Result; - - // detected language and key phrases - this.detectedLangTextBlock.Text = !string.IsNullOrEmpty(detectedLanguage.Name) ? $"{detectedLanguage.Name} (confidence: {(int)(detectedLanguage.ConfidenceScore * 100)}%)" : "Not found"; - this.detectedKeyPhrasesTextBlock.Text = detectedKeyPhrases.Any() ? string.Join(", ", detectedKeyPhrases) : "Not found"; - - // document sentiment - CreateSentimentChart(documentSentiment); - - // mined opinions - OpinionMiningCollection.Clear(); - var minedOpinions = documentSentiment?.Sentences.SelectMany(s => s.MinedOpinions); - if (minedOpinions != null && minedOpinions.Any()) - { - var minedOpinionList = minedOpinions.Select(om => new MinedOpinion() - { - Aspect = om.Aspect.Text, - Opinions = string.Join(", ", om.Opinions.Select(o => $"{o.Text} ({o.Sentiment.ToString("G")})")) - }); - OpinionMiningCollection.AddRange(minedOpinionList); - } + DetectedLanguage detectedLanguage = await TextAnalyticsHelper.DetectLanguageAsync(input); + string languageCode = TextAnalyticsHelper.GetLanguageCode(detectedLanguage); - // entities - if (namedEntitiesResponse.Any()) - { - this.namesEntitiesGridView.ItemsSource = namedEntitiesResponse.Select(x => new { x.Text, Category = $"[{x.Category}]" }); - } - else - { - this.namesEntitiesGridView.ItemsSource = new[] { new { Text = "No entities" } }; - } + // check supported languages + bool isOpinionMiningSupported = TextAnalyticsHelper.OpinionMiningSupportedLanguages.Any(l => string.Equals(l, languageCode, StringComparison.OrdinalIgnoreCase)); + bool isSentimentSupported = TextAnalyticsHelper.SentimentAnalysisSupportedLanguages.Any(l => string.Equals(l, languageCode, StringComparison.OrdinalIgnoreCase)); + bool isKeyPhraseSupported = TextAnalyticsHelper.KeyPhraseExtractionSupportedLanguages.Any(l => string.Equals(l, languageCode, StringComparison.OrdinalIgnoreCase)); + bool isNamedEntitySupported = TextAnalyticsHelper.NamedEntitySupportedLanguages.Any(l => string.Equals(l, languageCode, StringComparison.OrdinalIgnoreCase)); + bool isEntityLinkingSupported = TextAnalyticsHelper.EntityLinkingSupportedLanguages.Any(l => string.Equals(l, languageCode, StringComparison.OrdinalIgnoreCase)); + + // sentiment analysis, key phrase extraction, named entity recognition and entity linking + Task documentSentimentTask = isSentimentSupported ? TextAnalyticsHelper.AnalyzeSentimentAsync(input, languageCode, isOpinionMiningSupported) : Task.FromResult(null); + Task detectedKeyPhrasesTask = isKeyPhraseSupported ? TextAnalyticsHelper.ExtractKeyPhrasesAsync(input, languageCode) : Task.FromResult(null); + Task namedEntitiesResponseTask = isNamedEntitySupported ? TextAnalyticsHelper.RecognizeEntitiesAsync(input, languageCode) : Task.FromResult(null); + Task linkedEntitiesResponseTask = isEntityLinkingSupported ? TextAnalyticsHelper.RecognizeLinkedEntitiesAsync(input, languageCode) : Task.FromResult(null); + + await Task.WhenAll(documentSentimentTask, detectedKeyPhrasesTask, namedEntitiesResponseTask, linkedEntitiesResponseTask); + + DocumentSentiment documentSentiment = documentSentimentTask.Result; + KeyPhraseCollection detectedKeyPhrases = detectedKeyPhrasesTask.Result; + CategorizedEntityCollection namedEntitiesResponse = namedEntitiesResponseTask.Result; + LinkedEntityCollection linkedEntitiesResponse = linkedEntitiesResponseTask.Result; + + // display results + this.detectedLangTextBlock.Text = !string.IsNullOrEmpty(detectedLanguage.Name) ? $"{detectedLanguage.Name} (confidence: {(int)(detectedLanguage.ConfidenceScore * 100)}%)" : NotFound; - // linked entities - if (linkedEntitiesResponse.Any()) + this.detectedKeyPhrasesTextBlock.Text = detectedKeyPhrases != null && detectedKeyPhrases.Any() + ? string.Join(", ", detectedKeyPhrases) + : isKeyPhraseSupported ? NotFound : LanguageNotSupported; + + this.namesEntitiesGridView.ItemsSource = namedEntitiesResponse != null && namedEntitiesResponse.Any() + ? namedEntitiesResponse.Select(x => new { x.Text, Category = $"[{x.Category}]" }) + : new[] { new { Text = isNamedEntitySupported ? "No entities" : LanguageNotSupported, Category = "" } }; + + this.linkedEntitiesGridView.ItemsSource = linkedEntitiesResponse != null && linkedEntitiesResponse.Any() + ? linkedEntitiesResponse.Select(x => new { Name = $"{x.Name} ({x.DataSource})", x.Url }) + : new[] { + isEntityLinkingSupported + ? new { Name = "No linked entities", Url = new Uri("about:blank") } + : new { Name = LanguageNotSupported, Url = TextAnalyticsHelper.LanguageSupportUri } + }; + + if (isSentimentSupported) { - this.linkedEntitiesGridView.ItemsSource = linkedEntitiesResponse.Select(x => new { Name = $"{x.Name} ({x.DataSource})", x.Url }); + CreateSentimentChart(documentSentiment); + + // mined opinions + OpinionMiningCollection.Clear(); + var minedOpinions = documentSentiment?.Sentences.SelectMany(s => s.MinedOpinions); + if (minedOpinions != null && minedOpinions.Any()) + { + var minedOpinionList = minedOpinions.Select(om => new MinedOpinion() + { + Aspect = om.Aspect.Text, + Opinions = string.Join(", ", om.Opinions.Select(o => $"{o.Text} ({o.Sentiment.ToString("G")})")) + }); + OpinionMiningCollection.AddRange(minedOpinionList); + } } else { - this.linkedEntitiesGridView.ItemsSource = new[] { new { Name = "No linked entities" } }; + this.sentimentTextBlock.Text = LanguageNotSupported; + this.sentimentChart.Visibility = Visibility.Collapsed; } // prepare json result @@ -239,6 +248,7 @@ private void DisplayProcessingUI() this.detectedKeyPhrasesTextBlock.Text = label; this.sentimentChart.Visibility = Visibility.Collapsed; this.sentimentTextBlock.Text = label; + this.OpinionMiningCollection.Clear(); this.namesEntitiesGridView.ItemsSource = new[] { new { Text = label } }; this.linkedEntitiesGridView.ItemsSource = new[] { new { Name = label } }; }