From 1e0cd98af9283d2849706cab3c3c9a1fa4ab81c3 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 11:48:42 -0500 Subject: [PATCH 1/8] upgraded Akka.HealthCheck and Akka.NET --- src/WebCrawler.Shared.DevOps/Config/crawler.DevOps.conf | 2 +- src/common.props | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/WebCrawler.Shared.DevOps/Config/crawler.DevOps.conf b/src/WebCrawler.Shared.DevOps/Config/crawler.DevOps.conf index ed0421a..739f365 100644 --- a/src/WebCrawler.Shared.DevOps/Config/crawler.DevOps.conf +++ b/src/WebCrawler.Shared.DevOps/Config/crawler.DevOps.conf @@ -35,7 +35,7 @@ akka.healthcheck{ readiness{ # Use the cluster readiness provider # BUG: cluster readiness provider not sending back expected replies - #provider = "Akka.Cluster.HealthCheck.ClusterReadinessProbeProvider, Akka.Cluster.HealthCheck" + #provider = "Akka.HealthCheck.Cluster.ClusterReadinessProbeProvider, Akka.HealthCheck.Cluster" transport = tcp # use a second socket for TCP readiness checks from K8s tcp.port = 11001 } diff --git a/src/common.props b/src/common.props index b3aacb8..2f2e626 100644 --- a/src/common.props +++ b/src/common.props @@ -14,9 +14,9 @@ 2.4.1 - 1.3.11 + 1.3.12 0.2.0 - 0.1.0 + 0.2.1 0.5.0 netcoreapp2.1 From 0a39e6bd97fc30f1601a190399d5b717285167d7 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 11:58:17 -0500 Subject: [PATCH 2/8] upgraded to new package name --- src/WebCrawler.Shared.DevOps/WebCrawler.Shared.DevOps.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WebCrawler.Shared.DevOps/WebCrawler.Shared.DevOps.csproj b/src/WebCrawler.Shared.DevOps/WebCrawler.Shared.DevOps.csproj index 141f4ae..a2c9962 100644 --- a/src/WebCrawler.Shared.DevOps/WebCrawler.Shared.DevOps.csproj +++ b/src/WebCrawler.Shared.DevOps/WebCrawler.Shared.DevOps.csproj @@ -15,7 +15,7 @@ - + From c40b0cd98cbde5bd82c0ca1d5ab2c20e4f265b8a Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 12:04:37 -0500 Subject: [PATCH 3/8] fixed healthcheck package in other projects --- src/WebCrawler.CrawlService/WebCrawler.CrawlService.csproj | 2 +- src/WebCrawler.TrackerService/WebCrawler.TrackerService.csproj | 2 +- src/WebCrawler.Web/WebCrawler.Web.csproj | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/WebCrawler.CrawlService/WebCrawler.CrawlService.csproj b/src/WebCrawler.CrawlService/WebCrawler.CrawlService.csproj index 262781d..8d009ad 100644 --- a/src/WebCrawler.CrawlService/WebCrawler.CrawlService.csproj +++ b/src/WebCrawler.CrawlService/WebCrawler.CrawlService.csproj @@ -14,7 +14,7 @@ - + diff --git a/src/WebCrawler.TrackerService/WebCrawler.TrackerService.csproj b/src/WebCrawler.TrackerService/WebCrawler.TrackerService.csproj index 2352da3..5e86294 100644 --- a/src/WebCrawler.TrackerService/WebCrawler.TrackerService.csproj +++ b/src/WebCrawler.TrackerService/WebCrawler.TrackerService.csproj @@ -14,7 +14,7 @@ - + diff --git a/src/WebCrawler.Web/WebCrawler.Web.csproj b/src/WebCrawler.Web/WebCrawler.Web.csproj index 5d03c34..6570262 100644 --- a/src/WebCrawler.Web/WebCrawler.Web.csproj +++ b/src/WebCrawler.Web/WebCrawler.Web.csproj @@ -12,7 +12,7 @@ - + From 19033d8a7c419f3b88bc31710ce5f762a510630b Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 12:09:24 -0500 Subject: [PATCH 4/8] fixed unit test issue --- src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs b/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs index 6b6f444..b95cc0a 100644 --- a/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs +++ b/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Text; using Akka.Actor; -using Akka.Cluster.HealthCheck; +using Akka.HealthCheck.Cluster; using Akka.HealthCheck; using Akka.HealthCheck.Liveness; using Akka.HealthCheck.Readiness; From 1b20b46a9ebfd4e8e8d032561c999a543db38177 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 12:23:57 -0500 Subject: [PATCH 5/8] v0.2.5 release notes (#28) --- RELEASE_NOTES.md | 5 +++-- src/common.props | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index f2d2b46..3bb58f5 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,2 +1,3 @@ -#### 0.2.4 April 14 2019 #### -* Upgraded to [Petabridge.Cmd v0.5.0](https://cmd.petabridge.com/articles/RELEASE_NOTES.html#v050-march-05-2019) so we could take advantage of the `cluster tail` command. \ No newline at end of file +#### 0.2.5 April 14 2019 #### +* Upgraded to Akka.Cluster v1.3.12 +* Upgraded to [Akka.HealthCheck.Cluster v0.2.1](https://github.com/petabridge/akkadotnet-healthcheck/releases/tag/0.2.1) \ No newline at end of file diff --git a/src/common.props b/src/common.props index 2f2e626..fc11efd 100644 --- a/src/common.props +++ b/src/common.props @@ -2,8 +2,9 @@ Copyright © 2015-2019 Petabridge, LLC Petabridge - 0.2.4 - Upgraded to [Petabridge.Cmd v0.5.0](https://cmd.petabridge.com/articles/RELEASE_NOTES.html#v050-march-05-2019) so we could take advantage of the `cluster tail` command. + 0.2.5 + Upgraded to Akka.Cluster v1.3.12 +Upgraded to [Akka.HealthCheck.Cluster v0.2.1](https://github.com/petabridge/akkadotnet-healthcheck/releases/tag/0.2.1) From d754c666d4687a4931bbebfb05c5890d20225fdb Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 13:31:00 -0500 Subject: [PATCH 6/8] formatted code (#30) --- src/Lighthouse/LighthouseHostFactory.cs | 32 +++-- src/Lighthouse/LighthouseService.cs | 37 ++--- src/Lighthouse/Program.NetCore.cs | 18 +-- src/WebCrawler.CrawlService/CrawlerService.cs | 12 +- src/WebCrawler.CrawlService/Program.cs | 14 +- .../ActorSystemStartupSpecs.cs | 20 +-- .../Config/OpsConfigSpec.cs | 8 +- .../Config/OpsConfig.cs | 36 ++--- .../CrawlerBootstrapper.cs | 25 ++-- src/WebCrawler.Shared.IO/DownloadCommands.cs | 30 ++-- .../DownloadCoordinator.cs | 133 +++++++++--------- src/WebCrawler.Shared.IO/DownloadFlow.cs | 25 ++-- src/WebCrawler.Shared.IO/HttpClientFactory.cs | 12 +- .../Messages/CheckDocuments.cs | 38 +++-- .../Messages/ProcessDocuments.cs | 26 +++- src/WebCrawler.Shared.IO/ParseFlow.cs | 18 ++- .../Commands/V1/IStartJobV1.cs | 6 + .../Commands/V1/IStatusUpdateV1.cs | 6 + .../Commands/V1/ISubscribeToJobV1.cs | 8 +- .../Commands/V1/IUnsubscribeFromJobV1.cs | 6 + .../Commands/V1/JobStatusUpdate.cs | 31 ++-- src/WebCrawler.Shared/Commands/V1/StartJob.cs | 2 +- .../Commands/V1/SubscribeToJob.cs | 16 ++- .../Commands/V1/UnsubscribeFromJob.cs | 14 +- src/WebCrawler.Shared/Config/HoconLoader.cs | 13 +- .../State/CompletedDocument.cs | 2 +- src/WebCrawler.Shared/State/CrawlDocument.cs | 33 ++--- src/WebCrawler.Shared/State/CrawlJob.cs | 2 +- src/WebCrawler.Shared/State/CrawlJobStats.cs | 2 +- .../State/JobStatusMessage.cs | 2 +- src/WebCrawler.Shared/Util/Deadline.cs | 2 +- .../ActorSafeNameFromUri.cs | 2 +- .../Actors/ApiMaster.cs | 2 +- .../Actors/IO/CrawlMaster.cs | 104 +++++++------- .../Actors/Tracking/DownloadsMaster.cs | 2 +- .../Actors/Tracking/DownloadsTracker.cs | 26 ++-- src/WebCrawler.TrackerService/Program.cs | 2 +- .../State/CrawlStatus.cs | 22 +-- .../TrackerService.cs | 5 +- src/WebCrawler.Web/Actors/CommandProcessor.cs | 64 +++++---- src/WebCrawler.Web/Actors/SignalRActor.cs | 60 ++++---- src/WebCrawler.Web/Actors/SystemActors.cs | 13 +- src/WebCrawler.Web/AkkaStartupTasks.cs | 13 +- .../Controllers/HomeController.cs | 10 +- src/WebCrawler.Web/Hubs/CrawlHub.cs | 10 +- src/WebCrawler.Web/Hubs/CrawlHubHelper.cs | 34 +++-- src/WebCrawler.Web/Program.cs | 27 ++-- src/WebCrawler.Web/Startup.cs | 29 ++-- src/WebCrawler.Web/Views/Home/Index.cshtml | 11 +- src/WebCrawler.Web/Views/Shared/Error.cshtml | 3 +- .../Views/Shared/Error.cshtml.cs | 8 +- .../Views/Shared/_Layout.cshtml | 112 ++++++++------- .../Shared/_ValidationScriptsPartial.cshtml | 2 +- .../Views/Shared/_ViewImports.cshtml | 4 +- .../Views/Shared/_ViewStart.cshtml | 2 +- .../appsettings.Development.json | 16 +-- src/WebCrawler.Web/appsettings.json | 18 +-- src/WebCrawler.Web/bundleconfig.json | 40 +++--- src/WebCrawler.Web/package.json | 22 +-- 59 files changed, 724 insertions(+), 568 deletions(-) diff --git a/src/Lighthouse/LighthouseHostFactory.cs b/src/Lighthouse/LighthouseHostFactory.cs index 7ccbcd3..a9eab6a 100644 --- a/src/Lighthouse/LighthouseHostFactory.cs +++ b/src/Lighthouse/LighthouseHostFactory.cs @@ -1,17 +1,20 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.IO; using System.Linq; -using System.Net; using Akka.Actor; -using Akka.Bootstrap.Docker; using Akka.Configuration; using WebCrawler.Shared.DevOps; -using ConfigurationException = Akka.Configuration.ConfigurationException; namespace Lighthouse { /// - /// Launcher for the Lighthouse + /// Launcher for the Lighthouse /// public static class LighthouseHostFactory { @@ -23,9 +26,7 @@ public static ActorSystem LaunchLighthouse() var lighthouseConfig = clusterConfig.GetConfig("lighthouse"); if (lighthouseConfig != null && string.IsNullOrEmpty(systemName)) - { systemName = lighthouseConfig.GetString("actorsystem", systemName); - } var ipAddress = clusterConfig.GetString("akka.remote.dot-netty.tcp.public-hostname"); var port = clusterConfig.GetInt("akka.remote.dot-netty.tcp.port"); @@ -36,7 +37,8 @@ public static ActorSystem LaunchLighthouse() * Sanity check */ Console.WriteLine($"[Lighthouse] ActorSystem: {systemName}; IP: {ipAddress}; PORT: {port}"); - Console.WriteLine("[Lighthouse] Performing pre-boot sanity check. Should be able to parse address [{0}]", selfAddress); + Console.WriteLine("[Lighthouse] Performing pre-boot sanity check. Should be able to parse address [{0}]", + selfAddress); selfAddress = new Address("akka.tcp", systemName, ipAddress.Trim(), port).ToString(); Console.WriteLine("[Lighthouse] Parse successful."); @@ -52,22 +54,24 @@ public static ActorSystem LaunchLighthouse() if (seeds.Count > 1) { - injectedClusterConfigString = seeds.Aggregate("akka.cluster.seed-nodes = [", (current, seed) => current + (@"""" + seed + @""", ")); + injectedClusterConfigString = seeds.Aggregate("akka.cluster.seed-nodes = [", + (current, seed) => current + @"""" + seed + @""", "); injectedClusterConfigString += "]"; } else { - injectedClusterConfigString = "akka.cluster.seed-nodes = [\""+ selfAddress +"\"]"; + injectedClusterConfigString = "akka.cluster.seed-nodes = [\"" + selfAddress + "\"]"; } } - - var finalConfig = injectedClusterConfigString != null ? injectedClusterConfigString - .WithFallback(clusterConfig) : clusterConfig; + var finalConfig = injectedClusterConfigString != null + ? injectedClusterConfigString + .WithFallback(clusterConfig) + : clusterConfig; return ActorSystem.Create(systemName, finalConfig) .StartPbm(); } } -} +} \ No newline at end of file diff --git a/src/Lighthouse/LighthouseService.cs b/src/Lighthouse/LighthouseService.cs index d89747c..490435d 100644 --- a/src/Lighthouse/LighthouseService.cs +++ b/src/Lighthouse/LighthouseService.cs @@ -1,22 +1,11 @@ -// Copyright 2014-2015 Aaron Stannard, Petabridge LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use -// this file except in compliance with the License. You may obtain a copy of the -// License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- -using System; using System.Threading.Tasks; using Akka.Actor; -using Akka.Cluster; -using Petabridge.Cmd.Cluster; -using Petabridge.Cmd.Host; namespace Lighthouse { @@ -24,22 +13,22 @@ public class LighthouseService { private ActorSystem _lighthouseSystem; - public void Start() - { - _lighthouseSystem = LighthouseHostFactory.LaunchLighthouse(); - } - /// - /// Task completes once the Lighthouse has terminated. + /// Task completes once the Lighthouse has terminated. /// /// - /// Doesn't actually invoke termination. Need to call for that. + /// Doesn't actually invoke termination. Need to call for that. /// public Task TerminationHandle => _lighthouseSystem.WhenTerminated; + public void Start() + { + _lighthouseSystem = LighthouseHostFactory.LaunchLighthouse(); + } + public async Task StopAsync() { await CoordinatedShutdown.Get(_lighthouseSystem).Run(); } } -} +} \ No newline at end of file diff --git a/src/Lighthouse/Program.NetCore.cs b/src/Lighthouse/Program.NetCore.cs index a7f6f36..7f3850d 100644 --- a/src/Lighthouse/Program.NetCore.cs +++ b/src/Lighthouse/Program.NetCore.cs @@ -1,5 +1,10 @@ -using System; -using System.Diagnostics; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; namespace Lighthouse { @@ -16,11 +21,8 @@ public static void Main(string[] args) await lighthouseService.StopAsync(); }; - Console.CancelKeyPress += async (sender, eventArgs) => - { - await lighthouseService.StopAsync(); - }; - lighthouseService.TerminationHandle.Wait(); + Console.CancelKeyPress += async (sender, eventArgs) => { await lighthouseService.StopAsync(); }; + lighthouseService.TerminationHandle.Wait(); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.CrawlService/CrawlerService.cs b/src/WebCrawler.CrawlService/CrawlerService.cs index 1fac33a..1d80193 100644 --- a/src/WebCrawler.CrawlService/CrawlerService.cs +++ b/src/WebCrawler.CrawlService/CrawlerService.cs @@ -1,9 +1,11 @@ -using System; -using System.Collections.Generic; -using System.Text; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using System.Threading.Tasks; using Akka.Actor; -using Akka.Bootstrap.Docker; using WebCrawler.Shared.Config; using WebCrawler.Shared.DevOps; @@ -28,4 +30,4 @@ public async Task Stop() await CoordinatedShutdown.Get(ClusterSystem).Run(CoordinatedShutdown.ClrExitReason.Instance); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.CrawlService/Program.cs b/src/WebCrawler.CrawlService/Program.cs index 6b9c73f..f65211b 100644 --- a/src/WebCrawler.CrawlService/Program.cs +++ b/src/WebCrawler.CrawlService/Program.cs @@ -1,10 +1,16 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; namespace WebCrawler.CrawlService { - class Program + internal class Program { - static void Main(string[] args) + private static void Main(string[] args) { var crawlerService = new CrawlerService(); crawlerService.Start(); @@ -23,4 +29,4 @@ static void Main(string[] args) crawlerService.WhenTerminated.Wait(); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs b/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs index b95cc0a..ba8c9d9 100644 --- a/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs +++ b/src/WebCrawler.Shared.DevOps.Tests/ActorSystemStartupSpecs.cs @@ -1,13 +1,13 @@ -using System; -using System.Collections.Generic; -using System.Text; -using Akka.Actor; -using Akka.HealthCheck.Cluster; -using Akka.HealthCheck; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using Akka.HealthCheck.Liveness; using Akka.HealthCheck.Readiness; using Akka.TestKit.Xunit2; -using FluentAssertions; using Xunit; using Xunit.Abstractions; @@ -16,7 +16,9 @@ namespace WebCrawler.Shared.DevOps.Tests public class ActorSystemStartupSpecs : TestKit { public ActorSystemStartupSpecs(ITestOutputHelper helper) - : base(Akka.Configuration.Config.Empty.ApplyOpsConfig(), output: helper) { } + : base(Akka.Configuration.Config.Empty.ApplyOpsConfig(), output: helper) + { + } [Fact(DisplayName = "Instrumented ActorSystem should start HealthChecks automatically")] public void ActorSystem_should_start_HealthChecks_automatically() @@ -46,4 +48,4 @@ public void ActorSystem_should_start_HealthChecks_automatically() //AkkaHealthCheck.For(Sys).ReadinessProvider.Should().BeOfType(); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.DevOps.Tests/Config/OpsConfigSpec.cs b/src/WebCrawler.Shared.DevOps.Tests/Config/OpsConfigSpec.cs index 31899e0..0428530 100644 --- a/src/WebCrawler.Shared.DevOps.Tests/Config/OpsConfigSpec.cs +++ b/src/WebCrawler.Shared.DevOps.Tests/Config/OpsConfigSpec.cs @@ -1,3 +1,9 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using FluentAssertions; using WebCrawler.Shared.DevOps.Config; using Xunit; @@ -13,4 +19,4 @@ public void Should_load_default_OpsConfig() config.Should().NotBeNull(); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.DevOps/Config/OpsConfig.cs b/src/WebCrawler.Shared.DevOps/Config/OpsConfig.cs index a772246..c4b02e8 100644 --- a/src/WebCrawler.Shared.DevOps/Config/OpsConfig.cs +++ b/src/WebCrawler.Shared.DevOps/Config/OpsConfig.cs @@ -1,29 +1,33 @@ -using System; -using System.Collections.Generic; -using System.Text; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using Akka.Configuration; namespace WebCrawler.Shared.DevOps.Config { /// - /// Holder for shared configuration data used by all WebCrawler services + /// Holder for shared configuration data used by all WebCrawler services /// public class OpsConfig { /// - /// Name of the variable used to look for Phobos + /// Name of the variable used to look for Phobos /// public const string PHOBOS_ENABLED = "PHOBOS_ENABLED"; /// - /// Name of the variable used to direct Phobos' StatsD - /// output. + /// Name of the variable used to direct Phobos' StatsD + /// output. /// public const string STATSD_URL = "STATSD_URL"; /// - /// Name of the variable used to direct Phobos' StatsD - /// output. + /// Name of the variable used to direct Phobos' StatsD + /// output. /// public const string STATSD_PORT = "STATSD_PORT"; @@ -34,17 +38,17 @@ public static Akka.Configuration.Config GetOpsConfig() public static Akka.Configuration.Config GetPhobosConfig() { - - var rawPhobosConfig = ConfigurationFactory.FromResource("WebCrawler.Shared.DevOps.Config.crawler.Phobos.conf"); + var rawPhobosConfig = + ConfigurationFactory.FromResource("WebCrawler.Shared.DevOps.Config.crawler.Phobos.conf"); var statsdUrl = Environment.GetEnvironmentVariable(STATSD_URL); var statsDPort = Environment.GetEnvironmentVariable(STATSD_PORT); if (!string.IsNullOrEmpty(statsdUrl) && int.TryParse(statsDPort, out var portNum)) - { - return ConfigurationFactory.ParseString($"phobos.monitoring.statsd.endpoint=\"{statsdUrl}\"" + Environment.NewLine + - $"phobos.monitoring.statsd.port={portNum}").WithFallback(rawPhobosConfig); - } + return ConfigurationFactory.ParseString($"phobos.monitoring.statsd.endpoint=\"{statsdUrl}\"" + + Environment.NewLine + + $"phobos.monitoring.statsd.port={portNum}") + .WithFallback(rawPhobosConfig); return rawPhobosConfig; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.DevOps/CrawlerBootstrapper.cs b/src/WebCrawler.Shared.DevOps/CrawlerBootstrapper.cs index 71f8d59..a13825d 100644 --- a/src/WebCrawler.Shared.DevOps/CrawlerBootstrapper.cs +++ b/src/WebCrawler.Shared.DevOps/CrawlerBootstrapper.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using Akka.Actor; using Akka.Bootstrap.Docker; using Petabridge.Cmd.Cluster; @@ -8,8 +14,8 @@ namespace WebCrawler.Shared.DevOps { /// - /// Used to help inject and standardize all of the different components - /// needed to run all of the crawler services in production. + /// Used to help inject and standardize all of the different components + /// needed to run all of the crawler services in production. /// public static class CrawlerBootstrapper { @@ -21,20 +27,19 @@ public static Akka.Configuration.Config ApplyOpsConfig(this Akka.Configuration.C public static Akka.Configuration.Config ApplyPhobosConfig(this Akka.Configuration.Config previousConfig) { - var enabledPhobosStr = Environment.GetEnvironmentVariable(OpsConfig.PHOBOS_ENABLED)?.Trim().ToLowerInvariant() ?? "false"; + var enabledPhobosStr = + Environment.GetEnvironmentVariable(OpsConfig.PHOBOS_ENABLED)?.Trim().ToLowerInvariant() ?? "false"; if (bool.TryParse(enabledPhobosStr, out var enabledPhobos) && enabledPhobos) - { return OpsConfig.GetPhobosConfig().WithFallback(previousConfig); - } return previousConfig; } /// - /// Start Petabridge.Cmd + /// Start Petabridge.Cmd /// - /// The that will run Petabridge.Cmd - /// The same + /// The that will run Petabridge.Cmd + /// The same public static ActorSystem StartPbm(this ActorSystem system) { var pbm = PetabridgeCmd.Get(system); @@ -43,4 +48,4 @@ public static ActorSystem StartPbm(this ActorSystem system) return system; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.IO/DownloadCommands.cs b/src/WebCrawler.Shared.IO/DownloadCommands.cs index 05873cb..13d4dff 100644 --- a/src/WebCrawler.Shared.IO/DownloadCommands.cs +++ b/src/WebCrawler.Shared.IO/DownloadCommands.cs @@ -1,6 +1,10 @@ -using System; -using System.Collections.Generic; -using System.Linq; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Net; using WebCrawler.Shared.State; @@ -13,7 +17,7 @@ public interface IDownloadResult } /// - /// Results form a operation + /// Results form a operation /// public class DownloadHtmlResult : IDownloadResult { @@ -50,8 +54,8 @@ public override bool Equals(object obj) { if (ReferenceEquals(null, obj)) return false; if (ReferenceEquals(this, obj)) return true; - if (obj.GetType() != this.GetType()) return false; - return Equals((DownloadImage)obj); + if (obj.GetType() != GetType()) return false; + return Equals((DownloadImage) obj); } public override int GetHashCode() @@ -61,7 +65,7 @@ public override int GetHashCode() } /// - /// Results from a operation + /// Results from a operation /// public class DownloadImageResult : IDownloadResult { @@ -72,11 +76,11 @@ public DownloadImageResult(DownloadImage command, byte[] bytes, HttpStatusCode s Command = command; } - public IDownloadDocument Command { get; private set; } + public byte[] Bytes { get; } - public byte[] Bytes { get; private set; } + public IDownloadDocument Command { get; } - public HttpStatusCode Status { get; private set; } + public HttpStatusCode Status { get; } } public class DownloadHtmlDocument : IDownloadDocument, IEquatable @@ -99,8 +103,8 @@ public override bool Equals(object obj) { if (ReferenceEquals(null, obj)) return false; if (ReferenceEquals(this, obj)) return true; - if (obj.GetType() != this.GetType()) return false; - return Equals((DownloadHtmlDocument)obj); + if (obj.GetType() != GetType()) return false; + return Equals((DownloadHtmlDocument) obj); } public override int GetHashCode() @@ -113,4 +117,4 @@ public interface IDownloadDocument { CrawlDocument Document { get; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.IO/DownloadCoordinator.cs b/src/WebCrawler.Shared.IO/DownloadCoordinator.cs index 48199c1..6d77f1b 100644 --- a/src/WebCrawler.Shared.IO/DownloadCoordinator.cs +++ b/src/WebCrawler.Shared.IO/DownloadCoordinator.cs @@ -1,77 +1,45 @@ -using System; -using System.Collections.Generic; -using System.Net.Http; -using Akka; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using Akka.Actor; using Akka.Event; -using Akka.Routing; using Akka.Streams; -using Akka.Streams.Actors; using Akka.Streams.Dsl; -using Akka.Streams.Implementation; using WebCrawler.Shared.IO.Messages; using WebCrawler.Shared.State; namespace WebCrawler.Shared.IO { /// - /// Actor responsible for using Akka.Streams to execute download and parsing of all content. - /// - /// Can be remote-deployed to other systems. - /// - /// Publishes statistics updates to its parent. + /// Actor responsible for using Akka.Streams to execute download and parsing of all content. + /// Can be remote-deployed to other systems. + /// Publishes statistics updates to its parent. /// public class DownloadCoordinator : ReceiveActor { - #region Constants - - public const string Downloader = "downloader"; - public const string Parser = "parser"; - - #endregion - - #region Messages - - /// - /// Used to signal that it's time to publish to the JobMaster - /// - public class PublishStatsTick - { - private PublishStatsTick() { } - private static readonly PublishStatsTick _instance = new PublishStatsTick(); - - public static PublishStatsTick Instance - { - get { return _instance; } - } - } + private const int DefaultMaxConcurrentDownloads = 50; + protected readonly IActorRef Commander; + protected readonly IActorRef DownloadsTracker; - public class StreamCompleteTick - { - private StreamCompleteTick() { } - public static readonly StreamCompleteTick Instance = new StreamCompleteTick(); - } + protected readonly long MaxConcurrentDownloads; - #endregion + private readonly ILoggingAdapter _logger = Context.GetLogger(); - const int DefaultMaxConcurrentDownloads = 50; - protected readonly IActorRef DownloadsTracker; - protected readonly IActorRef Commander; + private ICancelable _publishStatsTask; protected IActorRef DownloaderRouter; - protected IActorRef ParserRouter; - protected IActorRef SourceActor; protected CrawlJob Job; + protected IActorRef ParserRouter; + protected IActorRef SourceActor; protected CrawlJobStats Stats; - protected readonly long MaxConcurrentDownloads; - - private ICancelable _publishStatsTask; - - private ILoggingAdapter _logger = Context.GetLogger(); - - public DownloadCoordinator(CrawlJob job, IActorRef commander, IActorRef downloadsTracker, long maxConcurrentDownloads) + public DownloadCoordinator(CrawlJob job, IActorRef commander, IActorRef downloadsTracker, + long maxConcurrentDownloads) { Job = job; DownloadsTracker = downloadsTracker; @@ -93,7 +61,7 @@ public DownloadCoordinator(CrawlJob job, IActorRef commander, IActorRef download var source = Source.ActorRef(5000, OverflowStrategy.DropTail); - var graph = GraphDsl.Create(source, (builder, s) => + var graph = GraphDsl.Create(source, (builder, s) => { // html flows var downloadHtmlFlow = builder.Add(htmlFlow); @@ -131,7 +99,8 @@ protected override void PreStart() { // Schedule regular stats updates _publishStatsTask = new Cancelable(Context.System.Scheduler); - Context.System.Scheduler.ScheduleTellRepeatedly(TimeSpan.FromMilliseconds(250), TimeSpan.FromMilliseconds(250), Self, PublishStatsTick.Instance, Self, _publishStatsTask); + Context.System.Scheduler.ScheduleTellRepeatedly(TimeSpan.FromMilliseconds(250), + TimeSpan.FromMilliseconds(250), Self, PublishStatsTick.Instance, Self, _publishStatsTask); } protected override void PreRestart(Exception reason, object message) @@ -147,7 +116,9 @@ protected override void PostStop() //cancel the regularly scheduled task _publishStatsTask.Cancel(); } - catch { } + catch + { + } } private void Receiving() @@ -173,37 +144,59 @@ private void Receiving() }); //Update our local stats - Receive(discovered => - { - Stats = Stats.WithDiscovered(discovered); - }); + Receive(discovered => { Stats = Stats.WithDiscovered(discovered); }); //Received word from the DownloadTracker that we need to process some docs Receive(process => { foreach (var doc in process.Documents) - { SourceActor.Tell(doc); - } }); //hand the work off to the downloaders - Receive(download => - { - SourceActor.Tell(download.Document); - }); + Receive(download => { SourceActor.Tell(download.Document); }); Receive(completed => { - _logger.Info("Logging completed download {0} bytes {1}", completed.Document.DocumentUri,completed.NumBytes); + _logger.Info("Logging completed download {0} bytes {1}", completed.Document.DocumentUri, + completed.NumBytes); Stats = Stats.WithCompleted(completed); _logger.Info("Total stats {0}", Stats); }); - Receive(_ => + Receive(_ => { _logger.Info("Stream has completed. No more messages to process."); }); + } + + #region Constants + + public const string Downloader = "downloader"; + public const string Parser = "parser"; + + #endregion + + #region Messages + + /// + /// Used to signal that it's time to publish to the JobMaster + /// + public class PublishStatsTick + { + private PublishStatsTick() { - _logger.Info("Stream has completed. No more messages to process."); - }); + } + + public static PublishStatsTick Instance { get; } = new PublishStatsTick(); + } + + public class StreamCompleteTick + { + public static readonly StreamCompleteTick Instance = new StreamCompleteTick(); + + private StreamCompleteTick() + { + } } + + #endregion } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.IO/DownloadFlow.cs b/src/WebCrawler.Shared.IO/DownloadFlow.cs index 4e6638a..3e61fae 100644 --- a/src/WebCrawler.Shared.IO/DownloadFlow.cs +++ b/src/WebCrawler.Shared.IO/DownloadFlow.cs @@ -1,16 +1,17 @@ -using System; -using System.Collections.Generic; -using System.Linq; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Net; using System.Net.Http; -using System.Text; using System.Threading.Tasks; using Akka; using Akka.Actor; -using Akka.Streams.Actors; using Akka.Streams.Dsl; using Akka.Util.Internal; -using WebCrawler.Shared.IO.Messages; using WebCrawler.Shared.State; namespace WebCrawler.Shared.IO @@ -20,10 +21,11 @@ public static class DownloadFlow public static Flow SelectDocType() { return Flow.Create() - .Select(x => (x.IsImage ? new DownloadImage(x) : (IDownloadDocument)new DownloadHtmlDocument(x))); + .Select(x => x.IsImage ? new DownloadImage(x) : (IDownloadDocument) new DownloadHtmlDocument(x)); } - public static Flow ProcessHtmlDownloadFor(int degreeOfParallelism, HttpClient client) + public static Flow ProcessHtmlDownloadFor( + int degreeOfParallelism, HttpClient client) { return Flow.Create() .Where(x => x is DownloadHtmlDocument) @@ -55,7 +57,8 @@ public static Flow ProcessComple return Flow.Create() .Select( x => - new CompletedDocument(x.Command.AsInstanceOf().Document, x.Content.Length*2, + new CompletedDocument(x.Command.AsInstanceOf().Document, + x.Content.Length * 2, ActorRefs.NoSender)); } @@ -64,7 +67,7 @@ public static Flow ProcessImage { return Flow.Create() .Where(x => x is DownloadImage) - .Select(x => (DownloadImage)x) + .Select(x => (DownloadImage) x) .SelectAsyncUnordered(degreeOfParallelism, document => client.GetByteArrayAsync(document.Document.DocumentUri) @@ -96,4 +99,4 @@ private static Func, DownloadImageResult> DownloadImageContinuation }; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.IO/HttpClientFactory.cs b/src/WebCrawler.Shared.IO/HttpClientFactory.cs index ad6ab6a..84a139d 100644 --- a/src/WebCrawler.Shared.IO/HttpClientFactory.cs +++ b/src/WebCrawler.Shared.IO/HttpClientFactory.cs @@ -1,9 +1,15 @@ -using System.Net.Http; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System.Net.Http; namespace WebCrawler.Shared.IO { /// - /// Factory class for creating instances + /// Factory class for creating instances /// public static class HttpClientFactory { @@ -12,4 +18,4 @@ public static HttpClient GetClient() return new HttpClient(); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.IO/Messages/CheckDocuments.cs b/src/WebCrawler.Shared.IO/Messages/CheckDocuments.cs index 53cf9b9..324d949 100644 --- a/src/WebCrawler.Shared.IO/Messages/CheckDocuments.cs +++ b/src/WebCrawler.Shared.IO/Messages/CheckDocuments.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Collections.Generic; using System.Linq; using Akka.Actor; @@ -7,9 +13,9 @@ namespace WebCrawler.Shared.IO.Messages { /// - /// Message class used to check to see if any of the listed - /// s have are currently being processed - /// or have previously been processed. + /// Message class used to check to see if any of the listed + /// s have are currently being processed + /// or have previously been processed. /// public class CheckDocuments { @@ -20,21 +26,27 @@ public CheckDocuments(IList documents, IActorRef requestor, TimeS Documents = documents; } - public IList Documents { get; private set; } + public IList Documents { get; } - public int HtmlDocs { get { return Documents.Count(x => !x.IsImage); } } + public int HtmlDocs + { + get { return Documents.Count(x => !x.IsImage); } + } - public int Images { get { return Documents.Count(x => x.IsImage); } } + public int Images + { + get { return Documents.Count(x => x.IsImage); } + } /// - /// Reference to the actor who should take on the cleared documents + /// Reference to the actor who should take on the cleared documents /// - public IActorRef Requestor { get; private set; } + public IActorRef Requestor { get; } /// - /// The amount of time we think it'll take to crawl this document - /// based on current workload. + /// The amount of time we think it'll take to crawl this document + /// based on current workload. /// - public TimeSpan? EstimatedCrawlTime { get; private set; } + public TimeSpan? EstimatedCrawlTime { get; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared.IO/Messages/ProcessDocuments.cs b/src/WebCrawler.Shared.IO/Messages/ProcessDocuments.cs index 0e103a3..80169b6 100644 --- a/src/WebCrawler.Shared.IO/Messages/ProcessDocuments.cs +++ b/src/WebCrawler.Shared.IO/Messages/ProcessDocuments.cs @@ -1,4 +1,10 @@ -using System.Collections.Generic; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System.Collections.Generic; using System.Linq; using Akka.Actor; using WebCrawler.Shared.State; @@ -6,7 +12,7 @@ namespace WebCrawler.Shared.IO.Messages { /// - /// Message class used to confirm which documents are available for processing. + /// Message class used to confirm which documents are available for processing. /// public class ProcessDocuments { @@ -16,15 +22,21 @@ public ProcessDocuments(IList documents, IActorRef assigned) Documents = documents; } - public IList Documents { get; private set; } + public IList Documents { get; } - public int HtmlDocs { get { return Documents.Count(x => !x.IsImage); } } + public int HtmlDocs + { + get { return Documents.Count(x => !x.IsImage); } + } - public int Images { get { return Documents.Count(x => x.IsImage); } } + public int Images + { + get { return Documents.Count(x => x.IsImage); } + } /// - /// Reference to the actor who should take on the cleared documents + /// Reference to the actor who should take on the cleared documents /// - public IActorRef Assigned { get; private set; } + public IActorRef Assigned { get; } } } \ No newline at end of file diff --git a/src/WebCrawler.Shared.IO/ParseFlow.cs b/src/WebCrawler.Shared.IO/ParseFlow.cs index ed01ccc..2f5e2ea 100644 --- a/src/WebCrawler.Shared.IO/ParseFlow.cs +++ b/src/WebCrawler.Shared.IO/ParseFlow.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Collections.Generic; using System.Linq; using Akka; @@ -17,7 +23,6 @@ public static Flow GetParseFlow(Cra return Flow.Create().Async() .Select(downloadHtmlResult => { - var requestedUrls = new List(); var htmlString = downloadHtmlResult.Content; @@ -56,7 +61,8 @@ public static Flow GetParseFlow(Cra requestedUrls = requestedUrls.Concat(validLinkUris).ToList(); } - return new CheckDocuments(requestedUrls, ActorRefs.NoSender, TimeSpan.FromMilliseconds(requestedUrls.Count*5000)); + return new CheckDocuments(requestedUrls, ActorRefs.NoSender, + TimeSpan.FromMilliseconds(requestedUrls.Count * 5000)); }); } @@ -83,7 +89,9 @@ public static bool AbsoluteUriIsInDomain(CrawlJob jobRoot, Uri otherUri) public static Uri ToAsboluteUri(CrawlJob jobRoot, string rawUri) { - return Uri.IsWellFormedUriString(rawUri, UriKind.Absolute) ? new Uri(rawUri, UriKind.Absolute) : new Uri(jobRoot.Root, rawUri); + return Uri.IsWellFormedUriString(rawUri, UriKind.Absolute) + ? new Uri(rawUri, UriKind.Absolute) + : new Uri(jobRoot.Root, rawUri); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared/Commands/V1/IStartJobV1.cs b/src/WebCrawler.Shared/Commands/V1/IStartJobV1.cs index 54e9ded..6a21be1 100644 --- a/src/WebCrawler.Shared/Commands/V1/IStartJobV1.cs +++ b/src/WebCrawler.Shared/Commands/V1/IStartJobV1.cs @@ -1,3 +1,9 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using Akka.Actor; using Akka.Routing; using WebCrawler.Shared.State; diff --git a/src/WebCrawler.Shared/Commands/V1/IStatusUpdateV1.cs b/src/WebCrawler.Shared/Commands/V1/IStatusUpdateV1.cs index 6310655..4ef2feb 100644 --- a/src/WebCrawler.Shared/Commands/V1/IStatusUpdateV1.cs +++ b/src/WebCrawler.Shared/Commands/V1/IStatusUpdateV1.cs @@ -1,3 +1,9 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using System; using WebCrawler.Shared.State; diff --git a/src/WebCrawler.Shared/Commands/V1/ISubscribeToJobV1.cs b/src/WebCrawler.Shared/Commands/V1/ISubscribeToJobV1.cs index 0733e8c..27cdf35 100644 --- a/src/WebCrawler.Shared/Commands/V1/ISubscribeToJobV1.cs +++ b/src/WebCrawler.Shared/Commands/V1/ISubscribeToJobV1.cs @@ -1,4 +1,10 @@ -using Akka.Actor; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using Akka.Actor; using WebCrawler.Shared.State; namespace WebCrawler.Shared.Commands.V1 diff --git a/src/WebCrawler.Shared/Commands/V1/IUnsubscribeFromJobV1.cs b/src/WebCrawler.Shared/Commands/V1/IUnsubscribeFromJobV1.cs index e446d87..1b9b091 100644 --- a/src/WebCrawler.Shared/Commands/V1/IUnsubscribeFromJobV1.cs +++ b/src/WebCrawler.Shared/Commands/V1/IUnsubscribeFromJobV1.cs @@ -1,3 +1,9 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using Akka.Actor; using WebCrawler.Shared.State; diff --git a/src/WebCrawler.Shared/Commands/V1/JobStatusUpdate.cs b/src/WebCrawler.Shared/Commands/V1/JobStatusUpdate.cs index 5bcdc40..3738b48 100644 --- a/src/WebCrawler.Shared/Commands/V1/JobStatusUpdate.cs +++ b/src/WebCrawler.Shared/Commands/V1/JobStatusUpdate.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using Newtonsoft.Json; using WebCrawler.Shared.State; @@ -20,7 +26,8 @@ public JobStatusUpdate(CrawlJob job) : this(job, null, JobStatus.Starting, DateT } [JsonConstructor] // need this to tell JSON.NET which constructor to pick - public JobStatusUpdate(CrawlJob job, CrawlJobStats stats, JobStatus status, DateTime startTime, DateTime? endTime) + public JobStatusUpdate(CrawlJob job, CrawlJobStats stats, JobStatus status, DateTime startTime, + DateTime? endTime) { Job = job; StartTime = startTime; @@ -29,23 +36,17 @@ public JobStatusUpdate(CrawlJob job, CrawlJobStats stats, JobStatus status, Date Stats = stats; } - public CrawlJob Job { get; private set; } + public CrawlJob Job { get; } - public CrawlJobStats Stats { get; private set; } + public CrawlJobStats Stats { get; } - public DateTime StartTime { get; private set; } + public DateTime StartTime { get; } - public DateTime? EndTime { get; private set; } + public DateTime? EndTime { get; } - public TimeSpan Elapsed - { - get - { - return ((EndTime.HasValue ? EndTime.Value : DateTime.UtcNow) - StartTime); - } - } + public TimeSpan Elapsed => (EndTime.HasValue ? EndTime.Value : DateTime.UtcNow) - StartTime; - public JobStatus Status { get; private set; } + public JobStatus Status { get; } public JobStatusUpdate WithStats(CrawlJobStats newStats) { @@ -62,4 +63,4 @@ public JobStatusUpdate WithStatus(JobStatus status) return new JobStatusUpdate(Job, Stats, status, StartTime, EndTime); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared/Commands/V1/StartJob.cs b/src/WebCrawler.Shared/Commands/V1/StartJob.cs index 449417e..1d09381 100644 --- a/src/WebCrawler.Shared/Commands/V1/StartJob.cs +++ b/src/WebCrawler.Shared/Commands/V1/StartJob.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.Shared/Commands/V1/SubscribeToJob.cs b/src/WebCrawler.Shared/Commands/V1/SubscribeToJob.cs index 8d91f9b..1ebbac7 100644 --- a/src/WebCrawler.Shared/Commands/V1/SubscribeToJob.cs +++ b/src/WebCrawler.Shared/Commands/V1/SubscribeToJob.cs @@ -1,10 +1,16 @@ -using Akka.Actor; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using Akka.Actor; using WebCrawler.Shared.State; namespace WebCrawler.Shared.Commands.V1 { /// - /// Subscribe an actor to a given + /// Subscribe an actor to a given /// public class SubscribeToJob : ISubscribeToJobV1 { @@ -14,8 +20,8 @@ public SubscribeToJob(CrawlJob job, IActorRef subscriber) Job = job; } - public CrawlJob Job { get; private set; } + public CrawlJob Job { get; } - public IActorRef Subscriber { get; private set; } + public IActorRef Subscriber { get; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared/Commands/V1/UnsubscribeFromJob.cs b/src/WebCrawler.Shared/Commands/V1/UnsubscribeFromJob.cs index c53c76b..ac662a9 100644 --- a/src/WebCrawler.Shared/Commands/V1/UnsubscribeFromJob.cs +++ b/src/WebCrawler.Shared/Commands/V1/UnsubscribeFromJob.cs @@ -1,10 +1,16 @@ -using Akka.Actor; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using Akka.Actor; using WebCrawler.Shared.State; namespace WebCrawler.Shared.Commands.V1 { /// - /// Unsuscribe an actor from a given + /// Unsuscribe an actor from a given /// public class UnsubscribeFromJob : IUnsubscribeFromJobV1 { @@ -14,8 +20,8 @@ public UnsubscribeFromJob(CrawlJob job, IActorRef subscriber) Job = job; } - public CrawlJob Job { get; private set; } + public CrawlJob Job { get; } - public IActorRef Subscriber { get; private set; } + public IActorRef Subscriber { get; } } } \ No newline at end of file diff --git a/src/WebCrawler.Shared/Config/HoconLoader.cs b/src/WebCrawler.Shared/Config/HoconLoader.cs index 81e8da8..beb664a 100644 --- a/src/WebCrawler.Shared/Config/HoconLoader.cs +++ b/src/WebCrawler.Shared/Config/HoconLoader.cs @@ -1,13 +1,16 @@ -using System; -using System.Collections.Generic; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using System.IO; -using System.Text; using Akka.Configuration; namespace WebCrawler.Shared.Config { /// - /// Used to load objects from stand-alone HOCON files. + /// Used to load objects from stand-alone HOCON files. /// public static class HoconLoader { @@ -16,4 +19,4 @@ public static Akka.Configuration.Config ParseConfig(string hoconPath) return ConfigurationFactory.ParseString(File.ReadAllText(hoconPath)); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared/State/CompletedDocument.cs b/src/WebCrawler.Shared/State/CompletedDocument.cs index 96750d9..58f3770 100644 --- a/src/WebCrawler.Shared/State/CompletedDocument.cs +++ b/src/WebCrawler.Shared/State/CompletedDocument.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.Shared/State/CrawlDocument.cs b/src/WebCrawler.Shared/State/CrawlDocument.cs index 0e2fb83..bced3ea 100644 --- a/src/WebCrawler.Shared/State/CrawlDocument.cs +++ b/src/WebCrawler.Shared/State/CrawlDocument.cs @@ -1,11 +1,17 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Collections.Generic; using System.Diagnostics; namespace WebCrawler.Shared.State { /// - /// Represents a single document, regardless of content type, discovered but not downloaded + /// Represents a single document, regardless of content type, discovered but not downloaded /// public class CrawlDocument : IEquatable { @@ -17,11 +23,13 @@ public CrawlDocument(Uri documentUri, bool isImage = false) } /// - /// Absolute URI of the document + /// Absolute URI of the document /// - public Uri DocumentUri { get; private set; } + public Uri DocumentUri { get; } + + public bool IsImage { get; } - public bool IsImage { get; private set; } + public static IEqualityComparer DocumentUriComparer { get; } = new DocumentUriEqualityComparer(); public bool Equals(CrawlDocument other) { @@ -34,13 +42,13 @@ public override bool Equals(object obj) { if (ReferenceEquals(null, obj)) return false; if (ReferenceEquals(this, obj)) return true; - if (obj.GetType() != this.GetType()) return false; + if (obj.GetType() != GetType()) return false; return Equals((CrawlDocument) obj); } public override int GetHashCode() { - return (DocumentUri != null ? DocumentUri.GetHashCode() : 0); + return DocumentUri != null ? DocumentUri.GetHashCode() : 0; } private sealed class DocumentUriEqualityComparer : IEqualityComparer @@ -56,15 +64,8 @@ public bool Equals(CrawlDocument x, CrawlDocument y) public int GetHashCode(CrawlDocument obj) { - return (obj.DocumentUri != null ? obj.DocumentUri.GetHashCode() : 0); + return obj.DocumentUri != null ? obj.DocumentUri.GetHashCode() : 0; } } - - private static readonly IEqualityComparer DocumentUriComparerInstance = new DocumentUriEqualityComparer(); - - public static IEqualityComparer DocumentUriComparer - { - get { return DocumentUriComparerInstance; } - } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Shared/State/CrawlJob.cs b/src/WebCrawler.Shared/State/CrawlJob.cs index 6338319..dc131ba 100644 --- a/src/WebCrawler.Shared/State/CrawlJob.cs +++ b/src/WebCrawler.Shared/State/CrawlJob.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.Shared/State/CrawlJobStats.cs b/src/WebCrawler.Shared/State/CrawlJobStats.cs index a83756a..f373bbd 100644 --- a/src/WebCrawler.Shared/State/CrawlJobStats.cs +++ b/src/WebCrawler.Shared/State/CrawlJobStats.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.Shared/State/JobStatusMessage.cs b/src/WebCrawler.Shared/State/JobStatusMessage.cs index fbe3fa8..88858a1 100644 --- a/src/WebCrawler.Shared/State/JobStatusMessage.cs +++ b/src/WebCrawler.Shared/State/JobStatusMessage.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.Shared/Util/Deadline.cs b/src/WebCrawler.Shared/Util/Deadline.cs index afbb1d5..5f266a8 100644 --- a/src/WebCrawler.Shared/Util/Deadline.cs +++ b/src/WebCrawler.Shared/Util/Deadline.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.TrackerService/ActorSafeNameFromUri.cs b/src/WebCrawler.TrackerService/ActorSafeNameFromUri.cs index 32ad980..a2c3dba 100644 --- a/src/WebCrawler.TrackerService/ActorSafeNameFromUri.cs +++ b/src/WebCrawler.TrackerService/ActorSafeNameFromUri.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.TrackerService/Actors/ApiMaster.cs b/src/WebCrawler.TrackerService/Actors/ApiMaster.cs index cc03f3a..afa455c 100644 --- a/src/WebCrawler.TrackerService/Actors/ApiMaster.cs +++ b/src/WebCrawler.TrackerService/Actors/ApiMaster.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.TrackerService/Actors/IO/CrawlMaster.cs b/src/WebCrawler.TrackerService/Actors/IO/CrawlMaster.cs index e4de58f..d001194 100644 --- a/src/WebCrawler.TrackerService/Actors/IO/CrawlMaster.cs +++ b/src/WebCrawler.TrackerService/Actors/IO/CrawlMaster.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Collections.Generic; using System.Linq; using Akka.Actor; @@ -12,59 +18,24 @@ namespace WebCrawler.TrackerService.Actors.IO { /// - /// Actor responsible for individual + /// Actor responsible for individual /// public class CrawlMaster : ReceiveActor, IWithUnboundedStash { - - #region Messages - - public class CrawlCanStart - { - public CrawlCanStart(IStartJobV1 job, int nodeCount) - { - Job = job; - NodeCount = nodeCount; - } - - public IStartJobV1 Job { get; private set; } - public int NodeCount { get; private set; } - } - - public class AttemptToStartJob - { - public AttemptToStartJob(IStartJobV1 job) - { - Job = job; - } - - public IStartJobV1 Job { get; private set; } - } - - #endregion - public const string CoordinatorRouterName = "coordinators"; protected readonly CrawlJob Job; - /// - /// All of the actors subscribed to updates for - /// - protected HashSet Subscribers = new HashSet(); - - protected JobStatusUpdate RunningStatus; - - protected CrawlJobStats TotalStats - { - get { return RunningStatus.Stats; } - set { RunningStatus = RunningStatus.WithStats(value); } - } - protected IActorRef CoordinatorRouter; protected IActorRef DownloadTracker; protected ICancelable JobStarter; protected ILoggingAdapter Log = Context.GetLogger(); - public IStash Stash { get; set; } + protected JobStatusUpdate RunningStatus; + + /// + /// All of the actors subscribed to updates for + /// + protected HashSet Subscribers = new HashSet(); public CrawlMaster(CrawlJob job) { @@ -75,6 +46,14 @@ public CrawlMaster(CrawlJob job) WaitingForTracker(); } + protected CrawlJobStats TotalStats + { + get => RunningStatus.Stats; + set => RunningStatus = RunningStatus.WithStats(value); + } + + public IStash Stash { get; set; } + protected override void PreStart() { /* Request a download tracker instance from the downloads master */ @@ -107,19 +86,16 @@ private void WaitingForTracker() // stash everything else until we have a tracker ReceiveAny(o => Stash.Stash()); } + private void BecomeReady() { if (Context.Child(CoordinatorRouterName).Equals(ActorRefs.Nobody)) - { CoordinatorRouter = Context.ActorOf( Props.Create(() => new DownloadCoordinator(Job, Self, DownloadTracker, 50)) .WithRouter(FromConfig.Instance), CoordinatorRouterName); - } else //in the event of a restart - { CoordinatorRouter = Context.Child(CoordinatorRouterName); - } Become(Ready); Stash.UnstashAll(); Context.SetReceiveTimeout(TimeSpan.FromSeconds(120)); @@ -158,14 +134,10 @@ private void Ready() Stash.UnstashAll(); }); - Receive(start => - { - Log.Info("Can't start job yet. No routees."); - }); + Receive(start => { Log.Info("Can't start job yet. No routees."); }); ReceiveAny(o => Stash.Stash()); - } private void Started() @@ -214,5 +186,31 @@ private void PublishJobStatus() foreach (var sub in Subscribers) sub.Tell(RunningStatus); } + + #region Messages + + public class CrawlCanStart + { + public CrawlCanStart(IStartJobV1 job, int nodeCount) + { + Job = job; + NodeCount = nodeCount; + } + + public IStartJobV1 Job { get; } + public int NodeCount { get; } + } + + public class AttemptToStartJob + { + public AttemptToStartJob(IStartJobV1 job) + { + Job = job; + } + + public IStartJobV1 Job { get; } + } + + #endregion } -} +} \ No newline at end of file diff --git a/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsMaster.cs b/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsMaster.cs index b988c37..df1cf33 100644 --- a/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsMaster.cs +++ b/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsMaster.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsTracker.cs b/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsTracker.cs index 822b4c3..d0690ff 100644 --- a/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsTracker.cs +++ b/src/WebCrawler.TrackerService/Actors/Tracking/DownloadsTracker.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Collections.Generic; using Akka.Actor; using WebCrawler.Shared.IO.Messages; @@ -8,14 +14,16 @@ namespace WebCrawler.TrackerService.Actors.Tracking { /// - /// Actor responsible for documenting the persistence of documents + /// Actor responsible for documenting the persistence of documents /// public class DownloadsTracker : ReceiveActor { - private readonly Dictionary _recordedDocuments; private readonly TimeSpan _defaultCrawlTime; + private readonly Dictionary _recordedDocuments; - public DownloadsTracker() : this(new Dictionary(), TimeSpan.FromSeconds(30)) { } + public DownloadsTracker() : this(new Dictionary(), TimeSpan.FromSeconds(30)) + { + } public DownloadsTracker(Dictionary recordedDocuments, TimeSpan defaultCrawlTime) { @@ -32,20 +40,20 @@ private void InitialReceives() var availableDocs = new List(); var discoveredDocs = new List(); foreach (var doc in check.Documents) - { //first time we've seen this doc if (!_recordedDocuments.ContainsKey(doc)) { - _recordedDocuments[doc] = CrawlStatus.StartCrawl(check.Requestor, check.EstimatedCrawlTime ?? _defaultCrawlTime); + _recordedDocuments[doc] = CrawlStatus.StartCrawl(check.Requestor, + check.EstimatedCrawlTime ?? _defaultCrawlTime); availableDocs.Add(doc); discoveredDocs.Add(doc); } - else if(_recordedDocuments[doc].TryClaim(check.Requestor, check.EstimatedCrawlTime ?? _defaultCrawlTime)) + else if (_recordedDocuments[doc] + .TryClaim(check.Requestor, check.EstimatedCrawlTime ?? _defaultCrawlTime)) { //TODO: add status message about new actor taking over processing here availableDocs.Add(doc); } - } Sender.Tell(new ProcessDocuments(availableDocs, check.Requestor)); Sender.Tell(new DiscoveredDocuments(discoveredDocs, check.Requestor)); @@ -59,4 +67,4 @@ private void InitialReceives() }); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.TrackerService/Program.cs b/src/WebCrawler.TrackerService/Program.cs index da27fc1..bebdc53 100644 --- a/src/WebCrawler.TrackerService/Program.cs +++ b/src/WebCrawler.TrackerService/Program.cs @@ -1,6 +1,6 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- diff --git a/src/WebCrawler.TrackerService/State/CrawlStatus.cs b/src/WebCrawler.TrackerService/State/CrawlStatus.cs index 062bab2..77b21f6 100644 --- a/src/WebCrawler.TrackerService/State/CrawlStatus.cs +++ b/src/WebCrawler.TrackerService/State/CrawlStatus.cs @@ -1,14 +1,19 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using Akka.Actor; using WebCrawler.Shared.Util; namespace WebCrawler.TrackerService.State { /// - /// The status of a particular operation. - /// - /// If the crawl operatoin isn't completed before the elapsed time, another actor can start - /// the process. + /// The status of a particular operation. + /// If the crawl operatoin isn't completed before the elapsed time, another actor can start + /// the process. /// public class CrawlStatus { @@ -16,10 +21,7 @@ public class CrawlStatus public Deadline Timeout { get; private set; } - public bool CanProcess - { - get { return !IsComplete && (Timeout == null || Timeout.IsOverdue); } - } + public bool CanProcess => !IsComplete && (Timeout == null || Timeout.IsOverdue); public IActorRef Owner { get; private set; } @@ -49,4 +51,4 @@ public bool TryClaim(IActorRef newOwner, TimeSpan crawlTime) return false; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.TrackerService/TrackerService.cs b/src/WebCrawler.TrackerService/TrackerService.cs index 0e0bd7c..7d6993e 100644 --- a/src/WebCrawler.TrackerService/TrackerService.cs +++ b/src/WebCrawler.TrackerService/TrackerService.cs @@ -1,16 +1,15 @@ // ----------------------------------------------------------------------- // -// Copyright (C) 2018 - 2018 Petabridge, LLC +// Copyright (C) 2015 - 2019 Petabridge, LLC // // ----------------------------------------------------------------------- using System.Threading.Tasks; using Akka.Actor; -using Akka.Bootstrap.Docker; using WebCrawler.Shared.Config; +using WebCrawler.Shared.DevOps; using WebCrawler.TrackerService.Actors; using WebCrawler.TrackerService.Actors.Tracking; -using WebCrawler.Shared.DevOps; namespace WebCrawler.TrackerService { diff --git a/src/WebCrawler.Web/Actors/CommandProcessor.cs b/src/WebCrawler.Web/Actors/CommandProcessor.cs index 3b14010..eeb9333 100644 --- a/src/WebCrawler.Web/Actors/CommandProcessor.cs +++ b/src/WebCrawler.Web/Actors/CommandProcessor.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Linq; using Akka.Actor; using Akka.Routing; @@ -8,37 +14,10 @@ namespace WebCrawler.Web.Actors { /// - /// Actor responsible for processing commands + /// Actor responsible for processing commands /// public class CommandProcessor : ReceiveActor { - #region Messages - - public class AttemptCrawl - { - public AttemptCrawl(string rawStr) - { - RawStr = rawStr; - } - - public string RawStr { get; private set; } - } - - public class BadCrawlAttempt - { - public BadCrawlAttempt(string rawStr, string message) - { - Message = message; - RawStr = rawStr; - } - - public string RawStr { get; private set; } - - public string Message { get; private set; } - } - - #endregion - protected readonly IActorRef CommandRouter; public CommandProcessor(IActorRef commandRouter) @@ -74,5 +53,32 @@ private void Receives() } }); } + + #region Messages + + public class AttemptCrawl + { + public AttemptCrawl(string rawStr) + { + RawStr = rawStr; + } + + public string RawStr { get; } + } + + public class BadCrawlAttempt + { + public BadCrawlAttempt(string rawStr, string message) + { + Message = message; + RawStr = rawStr; + } + + public string RawStr { get; } + + public string Message { get; } + } + + #endregion } } \ No newline at end of file diff --git a/src/WebCrawler.Web/Actors/SignalRActor.cs b/src/WebCrawler.Web/Actors/SignalRActor.cs index 46a001d..7abd1be 100644 --- a/src/WebCrawler.Web/Actors/SignalRActor.cs +++ b/src/WebCrawler.Web/Actors/SignalRActor.cs @@ -1,38 +1,20 @@ -using Akka.Actor; -using Microsoft.AspNetCore.SignalR; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using Akka.Actor; using WebCrawler.Shared.Commands.V1; using WebCrawler.Web.Hubs; namespace WebCrawler.Web.Actors { /// - /// Actor used to wrap a signalr hub + /// Actor used to wrap a signalr hub /// public class SignalRActor : ReceiveActor, IWithUnboundedStash { - #region Messages - - public class DebugCluster - { - public DebugCluster(string message) - { - Message = message; - } - - public string Message { get; private set; } - } - - public class SetHub : INoSerializationVerificationNeeded - { - public SetHub(CrawlHubHelper hub) - { - Hub = hub; - } - public CrawlHubHelper Hub { get; } - } - - #endregion - private readonly IActorRef _commandProcessor; private CrawlHubHelper _hub; @@ -44,6 +26,9 @@ public SignalRActor(IActorRef commandProcessor) WaitingForHub(); } + + public IStash Stash { get; set; } + private void HubAvailable() { Receive(str => { _commandProcessor.Tell(new CommandProcessor.AttemptCrawl(str)); }); @@ -75,7 +60,28 @@ private void WaitingForHub() ReceiveAny(_ => Stash.Stash()); } + #region Messages - public IStash Stash { get; set; } + public class DebugCluster + { + public DebugCluster(string message) + { + Message = message; + } + + public string Message { get; } + } + + public class SetHub : INoSerializationVerificationNeeded + { + public SetHub(CrawlHubHelper hub) + { + Hub = hub; + } + + public CrawlHubHelper Hub { get; } + } + + #endregion } } \ No newline at end of file diff --git a/src/WebCrawler.Web/Actors/SystemActors.cs b/src/WebCrawler.Web/Actors/SystemActors.cs index 0a95d80..6f32770 100644 --- a/src/WebCrawler.Web/Actors/SystemActors.cs +++ b/src/WebCrawler.Web/Actors/SystemActors.cs @@ -1,11 +1,16 @@ -using Akka.Actor; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using Akka.Actor; namespace WebCrawler.Web.Actors { /// - /// Static class used to work around weird SignalR constructors - /// - /// (need to learn how to wire this up properly in signalr) + /// Static class used to work around weird SignalR constructors + /// (need to learn how to wire this up properly in signalr) /// public static class SystemActors { diff --git a/src/WebCrawler.Web/AkkaStartupTasks.cs b/src/WebCrawler.Web/AkkaStartupTasks.cs index 003803a..68647c3 100644 --- a/src/WebCrawler.Web/AkkaStartupTasks.cs +++ b/src/WebCrawler.Web/AkkaStartupTasks.cs @@ -1,5 +1,10 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using Akka.Actor; -using Akka.Bootstrap.Docker; using Akka.Routing; using WebCrawler.Shared.Config; using WebCrawler.Shared.DevOps; @@ -14,9 +19,11 @@ public static ActorSystem StartAkka() var config = HoconLoader.ParseConfig("web.hocon"); SystemActors.ActorSystem = ActorSystem.Create("webcrawler", config.ApplyOpsConfig()).StartPbm(); var router = SystemActors.ActorSystem.ActorOf(Props.Empty.WithRouter(FromConfig.Instance), "tasker"); - var processor = SystemActors.CommandProcessor = SystemActors.ActorSystem.ActorOf(Props.Create(() => new CommandProcessor(router)), + var processor = SystemActors.CommandProcessor = SystemActors.ActorSystem.ActorOf( + Props.Create(() => new CommandProcessor(router)), "commands"); - SystemActors.SignalRActor = SystemActors.ActorSystem.ActorOf(Props.Create(() => new SignalRActor(processor)), "signalr"); + SystemActors.SignalRActor = + SystemActors.ActorSystem.ActorOf(Props.Create(() => new SignalRActor(processor)), "signalr"); return SystemActors.ActorSystem; } } diff --git a/src/WebCrawler.Web/Controllers/HomeController.cs b/src/WebCrawler.Web/Controllers/HomeController.cs index 7590663..c889b0e 100644 --- a/src/WebCrawler.Web/Controllers/HomeController.cs +++ b/src/WebCrawler.Web/Controllers/HomeController.cs @@ -1,7 +1,9 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Threading.Tasks; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using Microsoft.AspNetCore.Mvc; namespace WebCrawler.Web.Controllers diff --git a/src/WebCrawler.Web/Hubs/CrawlHub.cs b/src/WebCrawler.Web/Hubs/CrawlHub.cs index 1690cf5..0042615 100644 --- a/src/WebCrawler.Web/Hubs/CrawlHub.cs +++ b/src/WebCrawler.Web/Hubs/CrawlHub.cs @@ -1,5 +1,9 @@ -using System.Collections.Generic; -using System.Linq; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using Akka.Actor; using Microsoft.AspNetCore.SignalR; using WebCrawler.Web.Actors; @@ -13,4 +17,4 @@ public void StartCrawl(string message) SystemActors.SignalRActor.Tell(message, ActorRefs.Nobody); } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Web/Hubs/CrawlHubHelper.cs b/src/WebCrawler.Web/Hubs/CrawlHubHelper.cs index 3a7cae8..ca7f878 100644 --- a/src/WebCrawler.Web/Hubs/CrawlHubHelper.cs +++ b/src/WebCrawler.Web/Hubs/CrawlHubHelper.cs @@ -1,4 +1,10 @@ -using System; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using System.Threading; using System.Threading.Tasks; using Akka.Actor; @@ -11,7 +17,7 @@ namespace WebCrawler.Web.Hubs { /// /// - /// Necessary for getting access to a hub and passing it along to our actors + /// Necessary for getting access to a hub and passing it along to our actors /// public class CrawlHubHelper : IHostedService { @@ -22,6 +28,18 @@ public CrawlHubHelper(IHubContext hub) _hub = hub; } + public Task StartAsync(CancellationToken cancellationToken) + { + AkkaStartupTasks.StartAkka(); + SystemActors.SignalRActor.Tell(new SignalRActor.SetHub(this)); + return Task.CompletedTask; + } + + public Task StopAsync(CancellationToken cancellationToken) + { + return Task.CompletedTask; + } + public void PushStatus(IStatusUpdateV1 update) { WriteMessage( @@ -42,17 +60,5 @@ internal void WriteMessage(string message) { _hub.Clients.All.SendAsync("writeStatus", message); } - - public Task StartAsync(CancellationToken cancellationToken) - { - AkkaStartupTasks.StartAkka(); - SystemActors.SignalRActor.Tell(new SignalRActor.SetHub(this)); - return Task.CompletedTask; - } - - public Task StopAsync(CancellationToken cancellationToken) - { - return Task.CompletedTask; - } } } \ No newline at end of file diff --git a/src/WebCrawler.Web/Program.cs b/src/WebCrawler.Web/Program.cs index ffb4818..41820e6 100644 --- a/src/WebCrawler.Web/Program.cs +++ b/src/WebCrawler.Web/Program.cs @@ -1,13 +1,13 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Threading.Tasks; +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + +using System; using Akka.Actor; using Microsoft.AspNetCore; using Microsoft.AspNetCore.Hosting; -using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.Logging; using WebCrawler.Web.Actors; namespace WebCrawler.Web @@ -20,20 +20,23 @@ public static void Main(string[] args) Console.CancelKeyPress += async (sender, eventArgs) => { - var wait = CoordinatedShutdown.Get(SystemActors.ActorSystem).Run(CoordinatedShutdown.ClrExitReason.Instance); + var wait = CoordinatedShutdown.Get(SystemActors.ActorSystem) + .Run(CoordinatedShutdown.ClrExitReason.Instance); await host.StopAsync(TimeSpan.FromSeconds(10)); await wait; }; - + host.Run(); SystemActors.ActorSystem?.WhenTerminated.Wait(); } - public static IWebHost BuildWebHost(string[] args) => - WebHost.CreateDefaultBuilder(args) + public static IWebHost BuildWebHost(string[] args) + { + return WebHost.CreateDefaultBuilder(args) .UseKestrel() .UseStartup() .Build(); + } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Web/Startup.cs b/src/WebCrawler.Web/Startup.cs index 0908a5b..c977ff4 100644 --- a/src/WebCrawler.Web/Startup.cs +++ b/src/WebCrawler.Web/Startup.cs @@ -1,13 +1,13 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; using System.Threading; -using System.Threading.Tasks; using Microsoft.AspNetCore.Builder; -using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Hosting; -using Microsoft.AspNetCore.SignalR; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using WebCrawler.Web.Hubs; @@ -16,13 +16,13 @@ namespace WebCrawler.Web { public class Startup { - public static IServiceProvider Provider { get; private set; } - public Startup(IConfiguration configuration) { Configuration = configuration; } + public static IServiceProvider Provider { get; private set; } + public IConfiguration Configuration { get; } public void ConfigureServices(IServiceCollection services) @@ -33,7 +33,7 @@ public void ConfigureServices(IServiceCollection services) } // This method gets called by the runtime. Use this method to configure the HTTP request pipeline. - public void Configure(IApplicationBuilder app, IHostingEnvironment env) + public void Configure(IApplicationBuilder app, IHostingEnvironment env) { if (env.IsDevelopment()) { @@ -50,15 +50,12 @@ public void Configure(IApplicationBuilder app, IHostingEnvironment env) app.UseMvc(routes => { routes.MapRoute( - name: "default", - template: "{controller=Home}/{action=Index}/{id?}"); - }); - app.UseSignalR(builder => - { - builder.MapHub("/hubs/crawlHub"); + "default", + "{controller=Home}/{action=Index}/{id?}"); }); + app.UseSignalR(builder => { builder.MapHub("/hubs/crawlHub"); }); app.ApplicationServices.GetService().StartAsync(CancellationToken.None); //start Akka.NET } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Web/Views/Home/Index.cshtml b/src/WebCrawler.Web/Views/Home/Index.cshtml index bdde615..2cad8f5 100644 --- a/src/WebCrawler.Web/Views/Home/Index.cshtml +++ b/src/WebCrawler.Web/Views/Home/Index.cshtml @@ -6,7 +6,9 @@

Web Crawler of Doom

Akka.NET on Gorilla Steroids - Version (@ViewBag.AppVersion)

-

Learn more »

+

+ Learn more » +

@@ -49,9 +51,10 @@ // Reference the auto-generated proxy for the hub. var chat = new signalR.HubConnectionBuilder().withUrl("/hubs/crawlHub").build(); // Create a function that the hub can call back to display messages. - chat.on("writeStatus", function(message) { - stream.addMessage(message); - }); + chat.on("writeStatus", + function(message) { + stream.addMessage(message); + }); chat.start().then(function() { diff --git a/src/WebCrawler.Web/Views/Shared/Error.cshtml b/src/WebCrawler.Web/Views/Shared/Error.cshtml index 6ffa1be..e4f8459 100644 --- a/src/WebCrawler.Web/Views/Shared/Error.cshtml +++ b/src/WebCrawler.Web/Views/Shared/Error.cshtml @@ -1,5 +1,4 @@ @page -@using Views.Shared @model Views.Shared.ErrorModel @{ ViewData["Title"] = "Error"; @@ -21,4 +20,4 @@

Development environment should not be enabled in deployed applications, as it can result in sensitive information from exceptions being displayed to end users. For local debugging, development environment can be enabled by setting the ASPNETCORE_ENVIRONMENT environment variable to Development, and restarting the application. -

+

\ No newline at end of file diff --git a/src/WebCrawler.Web/Views/Shared/Error.cshtml.cs b/src/WebCrawler.Web/Views/Shared/Error.cshtml.cs index 8efbfe1..05984c1 100644 --- a/src/WebCrawler.Web/Views/Shared/Error.cshtml.cs +++ b/src/WebCrawler.Web/Views/Shared/Error.cshtml.cs @@ -1,3 +1,9 @@ +// ----------------------------------------------------------------------- +// +// Copyright (C) 2015 - 2019 Petabridge, LLC +// +// ----------------------------------------------------------------------- + using System.Diagnostics; using Microsoft.AspNetCore.Mvc.RazorPages; @@ -14,4 +20,4 @@ public void OnGet() RequestId = Activity.Current?.Id ?? HttpContext.TraceIdentifier; } } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Web/Views/Shared/_Layout.cshtml b/src/WebCrawler.Web/Views/Shared/_Layout.cshtml index 86f197b..8ff56b0 100644 --- a/src/WebCrawler.Web/Views/Shared/_Layout.cshtml +++ b/src/WebCrawler.Web/Views/Shared/_Layout.cshtml @@ -1,71 +1,77 @@  - - + + @ViewData["Title"] - WebCrawler.Web - - + + - + asp-fallback-test-class="sr-only" asp-fallback-test-property="position" asp-fallback-test-value="absolute"/> + - +
+ @RenderBody() +
+
+

© 2017 - WebCrawler.Web

+
+
- - - - - - - - - - + + + + + + + + + + - @RenderSection("Scripts", required: false) +@RenderSection("Scripts", false) - + \ No newline at end of file diff --git a/src/WebCrawler.Web/Views/Shared/_ValidationScriptsPartial.cshtml b/src/WebCrawler.Web/Views/Shared/_ValidationScriptsPartial.cshtml index a2b13b3..bbd0c1b 100644 --- a/src/WebCrawler.Web/Views/Shared/_ValidationScriptsPartial.cshtml +++ b/src/WebCrawler.Web/Views/Shared/_ValidationScriptsPartial.cshtml @@ -15,4 +15,4 @@ crossorigin="anonymous" integrity="sha384-JrXK+k53HACyavUKOsL+NkmSesD2P+73eDMrbTtTk0h4RmOF8hF8apPlkp26JlyH"> - + \ No newline at end of file diff --git a/src/WebCrawler.Web/Views/Shared/_ViewImports.cshtml b/src/WebCrawler.Web/Views/Shared/_ViewImports.cshtml index e3a0dc2..d1aca73 100644 --- a/src/WebCrawler.Web/Views/Shared/_ViewImports.cshtml +++ b/src/WebCrawler.Web/Views/Shared/_ViewImports.cshtml @@ -1,4 +1,4 @@ @using Microsoft.AspNetCore.Identity -@using WebCrawler.Web +@using Web @namespace WebCrawler.Web.Pages -@addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers +@addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers \ No newline at end of file diff --git a/src/WebCrawler.Web/Views/Shared/_ViewStart.cshtml b/src/WebCrawler.Web/Views/Shared/_ViewStart.cshtml index a5f1004..1af6e49 100644 --- a/src/WebCrawler.Web/Views/Shared/_ViewStart.cshtml +++ b/src/WebCrawler.Web/Views/Shared/_ViewStart.cshtml @@ -1,3 +1,3 @@ @{ Layout = "_Layout"; -} +} \ No newline at end of file diff --git a/src/WebCrawler.Web/appsettings.Development.json b/src/WebCrawler.Web/appsettings.Development.json index fa8ce71..c7038dd 100644 --- a/src/WebCrawler.Web/appsettings.Development.json +++ b/src/WebCrawler.Web/appsettings.Development.json @@ -1,10 +1,10 @@ { - "Logging": { - "IncludeScopes": false, - "LogLevel": { - "Default": "Debug", - "System": "Information", - "Microsoft": "Information" + "Logging": { + "IncludeScopes": false, + "LogLevel": { + "Default": "Debug", + "System": "Information", + "Microsoft": "Information" + } } - } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Web/appsettings.json b/src/WebCrawler.Web/appsettings.json index f5365c8..df98fdc 100644 --- a/src/WebCrawler.Web/appsettings.json +++ b/src/WebCrawler.Web/appsettings.json @@ -1,11 +1,11 @@ { - "ConnectionStrings": { - "DefaultConnection": "DataSource=app.db" - }, - "Logging": { - "IncludeScopes": false, - "LogLevel": { - "Default": "Warning" + "ConnectionStrings": { + "DefaultConnection": "DataSource=app.db" + }, + "Logging": { + "IncludeScopes": false, + "LogLevel": { + "Default": "Warning" + } } - } -} +} \ No newline at end of file diff --git a/src/WebCrawler.Web/bundleconfig.json b/src/WebCrawler.Web/bundleconfig.json index 6d3f9a5..be7d314 100644 --- a/src/WebCrawler.Web/bundleconfig.json +++ b/src/WebCrawler.Web/bundleconfig.json @@ -1,24 +1,24 @@ // Configure bundling and minification for the project. // More info at https://go.microsoft.com/fwlink/?LinkId=808241 [ - { - "outputFileName": "wwwroot/css/site.min.css", - // An array of relative input file paths. Globbing patterns supported - "inputFiles": [ - "wwwroot/css/site.css" - ] - }, - { - "outputFileName": "wwwroot/js/site.min.js", - "inputFiles": [ - "wwwroot/js/site.js" - ], - // Optionally specify minification options - "minify": { - "enabled": true, - "renameLocals": true + { + "outputFileName": "wwwroot/css/site.min.css", + // An array of relative input file paths. Globbing patterns supported + "inputFiles": [ + "wwwroot/css/site.css" + ] }, - // Optionally generate .map file - "sourceMap": false - } -] + { + "outputFileName": "wwwroot/js/site.min.js", + "inputFiles": [ + "wwwroot/js/site.js" + ], + // Optionally specify minification options + "minify": { + "enabled": true, + "renameLocals": true + }, + // Optionally generate .map file + "sourceMap": false + } +] \ No newline at end of file diff --git a/src/WebCrawler.Web/package.json b/src/WebCrawler.Web/package.json index e2226f3..db0d958 100644 --- a/src/WebCrawler.Web/package.json +++ b/src/WebCrawler.Web/package.json @@ -1,12 +1,12 @@ { - "name": "WebCrawler.Web", - "version": "1.0.0", - "description": "", - "main": "index.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "keywords": [], - "author": "", - "license": "ISC" -} + "name": "WebCrawler.Web", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC" +} \ No newline at end of file From 0331c1cde56de3fd47d12df829c7e448255e6db2 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 17:19:15 -0500 Subject: [PATCH 7/8] increased the throttle rate by factor of 5 (#31) --- src/WebCrawler.Shared.IO/DownloadCoordinator.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WebCrawler.Shared.IO/DownloadCoordinator.cs b/src/WebCrawler.Shared.IO/DownloadCoordinator.cs index 6d77f1b..32b756f 100644 --- a/src/WebCrawler.Shared.IO/DownloadCoordinator.cs +++ b/src/WebCrawler.Shared.IO/DownloadCoordinator.cs @@ -50,7 +50,7 @@ public DownloadCoordinator(CrawlJob job, IActorRef commander, IActorRef download var selfDocSink = Sink.ActorRef(Self, StreamCompleteTick.Instance); var selfImgSink = Sink.ActorRef(Self, StreamCompleteTick.Instance); var htmlFlow = Flow.Create().Via(DownloadFlow.SelectDocType()) - .Throttle(30, TimeSpan.FromSeconds(1), 100, ThrottleMode.Shaping) + .Throttle(30, TimeSpan.FromSeconds(5), 100, ThrottleMode.Shaping) .Via(DownloadFlow.ProcessHtmlDownloadFor(DefaultMaxConcurrentDownloads, HttpClientFactory.GetClient())); var imageFlow = Flow.Create() From f51ecec417b6f70206507b208e7c4363738fdaaa Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 14 Apr 2019 17:35:32 -0500 Subject: [PATCH 8/8] added v0.2.6 release notes (#32) --- RELEASE_NOTES.md | 5 ++--- src/common.props | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 3bb58f5..4ec0ed8 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,3 +1,2 @@ -#### 0.2.5 April 14 2019 #### -* Upgraded to Akka.Cluster v1.3.12 -* Upgraded to [Akka.HealthCheck.Cluster v0.2.1](https://github.com/petabridge/akkadotnet-healthcheck/releases/tag/0.2.1) \ No newline at end of file +#### 0.2.6 April 14 2019 #### +* Lengthened throttling window on `DownloadCoordinator` \ No newline at end of file diff --git a/src/common.props b/src/common.props index fc11efd..1e12572 100644 --- a/src/common.props +++ b/src/common.props @@ -2,9 +2,8 @@ Copyright © 2015-2019 Petabridge, LLC Petabridge - 0.2.5 - Upgraded to Akka.Cluster v1.3.12 -Upgraded to [Akka.HealthCheck.Cluster v0.2.1](https://github.com/petabridge/akkadotnet-healthcheck/releases/tag/0.2.1) + 0.2.6 + Lengthened throttling window on `DownloadCoordinator`