Skip to content

Commit

Permalink
refactor: Remove ReacherConfig, use CheckEmailInput (#1538)
Browse files Browse the repository at this point in the history
* feat: Remove ReacherConfig, use CheckEmailInput

* Fixed

* Fix request

* fix build

* fic docs

* test

* remove test
  • Loading branch information
amaury1093 authored Nov 25, 2024
1 parent f2e6c2f commit f75e0c4
Show file tree
Hide file tree
Showing 21 changed files with 183 additions and 230 deletions.
1 change: 1 addition & 0 deletions .github/workflows/deploy_cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:

windows:
runs-on: windows-latest
if: false # Disable Windows tests for now, because Reacher Worker is not supported on Windows
needs: install-cross
steps:
- uses: actions/checkout@v2
Expand Down
5 changes: 4 additions & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ RUN chown chrome:chrome docker.sh
USER chrome

ENV RUST_LOG=reacher=info
ENV RCH_HTTP_HOST=0.0.0.0
ENV RCH__HTTP_HOST=0.0.0.0
# Currently this Dockerfile is mainly used for single-shot verifications, so we
# disable the worker by default.
ENV RCH__WORKER__ENABLED=false

EXPOSE 8080

Expand Down
46 changes: 26 additions & 20 deletions backend/backend_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@ from_email = "[email protected]"
# Address of the Chrome WebDriver server for headless email verifications.
webdriver_addr = "http://localhost:9515"

# Timeout for each SMTP connection, in seconds. Leaving it commented out will
# not set a timeout, i.e. the connection will wait indefinitely.
# smtp_timeout = 45

# Uncomment the following lines to route all SMTP verification requests through
# a specified proxy. Note that the proxy must be a SOCKS5 proxy to work with
# the SMTP protocol. This proxy will not be used for headless verifications.
#
# The username and password are optional and only needed if the proxy requires
# authentication.
#
# [proxy]
# host = "my.proxy.com"
# port = 1080
Expand All @@ -52,22 +57,9 @@ hotmailb2c = "headless"
yahoo = "headless"

[worker]
enable = false

# Fields below are only used if the worker is enabled.

# Throttle the maximum number of requests per second, per minute, per hour, and
# per day for this worker.
# All fields are optional; comment them out to disable the limit.
#
# Important: these throttle configurations only apply to bulk verification and
# not to the single /v0/check_email endpoint. The latter endpoint always
# executes the verification immediately, regardless of the throttle settings.
[worker.throttle]
# max_requests_per_second = 20
# max_requests_per_minute = 100
# max_requests_per_hour = 1000
# max_requests_per_day = 20000
# Enable the worker to consume emails from the RabbitMQ queues. If set, the
# RabbitMQ configuration below must be set as well.
enable = true

# RabbitMQ configuration.
[worker.rabbitmq]
Expand All @@ -92,13 +84,27 @@ queues = "all"
# Number of concurrent emails to verify for this worker across all queues.
concurrency = 20

# Throttle the maximum number of requests per second, per minute, per hour, and
# per day for this worker.
# All fields are optional; comment them out to disable the limit.
#
# Important: these throttle configurations only apply to /v1/* endpoints, and
# not to the previous /v0/check_email endpoint. The latter endpoint always
# executes the verification immediately, regardless of the throttle settings.
[worker.throttle]
# max_requests_per_second = 20
# max_requests_per_minute = 100
# max_requests_per_hour = 1000
# max_requests_per_day = 20000

# Postgres configuration. Currently, a Postgres database is required to store
# the results of the verifications. This might change in the future, allowing
# for pluggable storage.
[worker.postgres]
db_url = "postgresql://localhost/reacherdb"

# Optional webhook URL to send the results to. This will send one POST request
# per email verification, with the result in the body.
# [worker.webhook.on_each_email]
# url = "http://localhost:8080/webhook"
# Optional Sentry configuration. If set, all errors will be sent to Sentry.
# [sentry]
# dsn = "<PASTE_YOUR_DSN_NOW>"
# Identifier sent to Sentry, usually the same as the the top-level backend_name.
# backend_name = "backend-dev"
30 changes: 15 additions & 15 deletions backend/scripts/debian11.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,27 @@ set -e

# You can change the default values of these variables inline here, or by
# setting them in the environment before running this script, e.g.:
# RCH_BACKEND_NAME="my-own-name" ./debian11.sh
# RCH__BACKEND_NAME="my-own-name" ./debian11.sh

# An unique identifier for the backend.
RCH_BACKEND_NAME=${RCH_BACKEND_NAME:-"backend1.mycompany.com"}
RCH__BACKEND_NAME=${RCH__BACKEND_NAME:-"backend1.mycompany.com"}
# Docker Hub tag for reacherhq/backend.
RCH_VERSION=${RCH_VERSION:-"v0.7.0"}
RCH_VERSION=${RCH_VERSION:-"v0.10.0-beta.1"}
# Optional: Send bug reports to a Sentry.io dashboard.
RCH_SENTRY_DSN=${RCH_SENTRY_DSN:-}
RCH__SENTRY_DSN=${RCH__SENTRY_DSN:-}
# Protect the backend from the public via a `x-reacher-secret` header.
RCH_HEADER_SECRET=${RCH_HEADER_SECRET:-}
RCH__HEADER_SECRET=${RCH__HEADER_SECRET:-}
# For the "FROM" field in emails.
RCH_FROM_EMAIL=${RCH_FROM_EMAIL:-"[email protected]"}
RCH__FROM_EMAIL=${RCH__FROM_EMAIL:-"[email protected]"}
# For the "EHLO" field in emails. This should ideally match the server's
# reverse DNS entry for optimal results.
RCH_HELLO_NAME=${RCH_HELLO_NAME:-"backend1.mycompany.com"}
RCH__HELLO_NAME=${RCH__HELLO_NAME:-"backend1.mycompany.com"}
# Timeout for SMTP connections in seconds.
RCH_SMTP_TIMEOUT=${RCH_SMTP_TIMEOUT:-"90"}
RCH__SMTP_TIMEOUT=${RCH__SMTP_TIMEOUT:-"90"}
# Logging. Setup to "debug" to show all logs.
RUST_LOG=${RUST_LOG:-"info"}

echo "Installing Reacher backend $RCH_VERSION on host $RCH_BACKEND_NAME..."
echo "Installing Reacher backend $RCH_VERSION on host $RCH__BACKEND_NAME..."

# Install Docker
# https://docs.docker.com/engine/install/debian/
Expand Down Expand Up @@ -64,12 +64,12 @@ docker rm reacher_backend
# Run the backend
docker run -d \
-e RUST_LOG=$RUST_LOG \
-e RCH_BACKEND_NAME=$RCH_BACKEND_NAME \
-e RCH_SENTRY_DSN=$RCH_SENTRY_DSN \
-e RCH_HEADER_SECRET=$RCH_HEADER_SECRET \
-e RCH_FROM_EMAIL=$RCH_FROM_EMAIL \
-e RCH_HELLO_NAME=$RCH_HELLO_NAME \
-e RCH_SMTP_TIMEOUT=$RCH_SMTP_TIMEOUT \
-e RCH__BACKEND_NAME=$RCH__BACKEND_NAME \
-e RCH__SENTRY_DSN=$RCH__SENTRY_DSN \
-e RCH__HEADER_SECRET=$RCH__HEADER_SECRET \
-e RCH__FROM_EMAIL=$RCH__FROM_EMAIL \
-e RCH__HELLO_NAME=$RCH__HELLO_NAME \
-e RCH__SMTP_TIMEOUT=$RCH__SMTP_TIMEOUT \
-p 80:8080 \
--name reacher_backend \
reacherhq/backend:$RCH_VERSION
Expand Down
46 changes: 15 additions & 31 deletions backend/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,21 @@ use crate::worker::check_email::TaskWebhook;
#[cfg(feature = "worker")]
use crate::worker::setup_rabbit_mq;
use anyhow::bail;
use check_if_email_exists::config::ReacherConfig;
use check_if_email_exists::{
CheckEmailInputProxy, GmailVerifMethod, HotmailB2BVerifMethod, HotmailB2CVerifMethod,
SentryConfig, YahooVerifMethod,
YahooVerifMethod,
};
use config::Config;
#[cfg(feature = "worker")]
use lapin::Channel;
use serde::de::{self, Deserializer, Visitor};
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
#[cfg(feature = "worker")]
use std::sync::Arc;
use std::{env, fmt};

#[derive(Debug, Default, Deserialize)]
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BackendConfig {
/// Name of the backend.
pub backend_name: String,
Expand All @@ -56,13 +55,15 @@ pub struct BackendConfig {
pub http_port: u16,
/// Shared secret between a trusted client and the backend.
pub header_secret: Option<String>,
/// Timeout for each SMTP connection, in seconds. Leaving it commented out
/// will not set a timeout, i.e. the connection will wait indefinitely.
pub smtp_timeout: Option<u64>,
/// Sentry DSN to report errors to
pub sentry_dsn: Option<String>,

/// Worker configuration, only present if the backend is a worker.
pub worker: WorkerConfig,

/// Sentry configuration to report errors.
pub sentry: Option<SentryConfig>,

// Internal fields, not part of the configuration.
#[serde(skip)]
pg_pool: Option<PgPool>,
Expand All @@ -75,14 +76,6 @@ pub struct BackendConfig {
}

impl BackendConfig {
pub fn get_reacher_config(&self) -> ReacherConfig {
ReacherConfig {
backend_name: self.backend_name.clone(),
sentry: self.sentry.clone(),
webdriver_addr: self.webdriver_addr.clone(),
}
}

/// Get the worker configuration.
///
/// # Panics
Expand Down Expand Up @@ -142,7 +135,7 @@ impl BackendConfig {
}
}

#[derive(Debug, Default, Deserialize, Clone)]
#[derive(Debug, Default, Deserialize, Clone, Serialize)]
pub struct VerifMethodConfig {
/// Verification method for Gmail emails.
pub gmail: GmailVerifMethod,
Expand All @@ -154,7 +147,7 @@ pub struct VerifMethodConfig {
pub yahoo: YahooVerifMethod,
}

#[derive(Debug, Default, Deserialize, Clone)]
#[derive(Debug, Default, Deserialize, Clone, Serialize)]
pub struct WorkerConfig {
pub enable: bool,

Expand Down Expand Up @@ -186,7 +179,7 @@ pub struct MustWorkerConfig {
pub postgres: PostgresConfig,
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub enum RabbitMQQueues {
All,
Only(Vec<Queue>),
Expand Down Expand Up @@ -251,7 +244,7 @@ impl RabbitMQQueues {
}
}

#[derive(Debug, Deserialize, Clone)]
#[derive(Debug, Deserialize, Clone, Serialize)]
pub struct RabbitMQConfig {
pub url: String,
/// Queues to consume emails from. By default the worker consumes from all
Expand All @@ -278,7 +271,7 @@ pub struct RabbitMQConfig {
/// Queue names that the worker can consume from. Each email is routed to a
/// one and only one queue, based on the email provider. A single worker can
/// consume from multiple queues.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub enum Queue {
Gmail,
HotmailB2B,
Expand Down Expand Up @@ -342,12 +335,12 @@ impl<'de> Deserialize<'de> for Queue {
}
}

#[derive(Debug, Deserialize, Clone)]
#[derive(Debug, Deserialize, Clone, Serialize)]
pub struct PostgresConfig {
pub db_url: String,
}

#[derive(Debug, Deserialize, Clone)]
#[derive(Debug, Deserialize, Clone, Serialize)]
pub struct ThrottleConfig {
pub max_requests_per_second: Option<u32>,
pub max_requests_per_minute: Option<u32>,
Expand Down Expand Up @@ -416,12 +409,3 @@ pub async fn load_config() -> Result<BackendConfig, anyhow::Error> {

Ok(cfg)
}

#[cfg(test)]
mod test {
#[tokio::test]
async fn test_load_config() {
let cfg = super::load_config().await;
assert!(cfg.is_ok());
}
}
38 changes: 17 additions & 21 deletions backend/src/http/v0/bulk/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
//! This file implements the `POST /bulk` endpoint.
use check_if_email_exists::{
check_email, config::ReacherConfig, CheckEmailInput, CheckEmailInputBuilder,
CheckEmailInputProxy, CheckEmailOutput, Reachable, SentryConfig, LOG_TARGET,
check_email, CheckEmailInput, CheckEmailInputBuilder, CheckEmailInputProxy, CheckEmailOutput,
Reachable, LOG_TARGET,
};
use serde::{Deserialize, Serialize};
use sqlx::{Pool, Postgres};
Expand Down Expand Up @@ -64,7 +64,7 @@ impl Iterator for TaskInputIterator {
fn next(&mut self) -> Option<Self::Item> {
if self.index < self.body.smtp_ports.len() {
let mut item = CheckEmailInputBuilder::default();
let mut item = item.to_email(self.body.to_email.clone());
let mut item: &mut CheckEmailInputBuilder = item.to_email(self.body.to_email.clone());

if let Some(name) = &self.body.hello_name {
item = item.hello_name(name.clone());
Expand All @@ -80,6 +80,19 @@ impl Iterator for TaskInputIterator {
item = item.proxy(Some(proxy.clone()));
}

// Currently, for the legacy and deprecated /v0/bulk endpoints, we
// don't pass in a BackendConfig to the job. Therefore, we must create
// an ad-hoc ReacherConfig here, using the legacy env::var() method.
// This is a temporary solution until the /v0/bulk endpoints are
// removed.
let backend_name = env::var("RCH_BACKEND_NAME").unwrap_or_else(|_| "reacher".into());
let sentry_dsn = env::var("RCH_SENTRY_DSN");
let webdriver_addr =
env::var("RCH_WEBDRIVER_ADDR").unwrap_or_else(|_| "http://localhost:9515".into());
item.backend_name(backend_name);
item.sentry_dsn(sentry_dsn.ok());
item.webdriver_addr(webdriver_addr);

self.index += 1;
Some(item.build().unwrap())
} else {
Expand Down Expand Up @@ -161,25 +174,8 @@ pub async fn email_verification_task(
current_job.id(),
);

// Currently, for the legacy and deprecated /v0/bulk endpoints, we
// don't pass in a BackendConfig to the job. Therefore, we must create
// an ad-hoc ReacherConfig here, using the legacy env::var() method.
// This is a temporary solution until the /v0/bulk endpoints are
// removed.
let backend_name = env::var("RCH_BACKEND_NAME").unwrap_or_else(|_| "reacher".into());
let sentry_dsn = env::var("RCH_SENTRY_DSN");
let webdriver_addr =
env::var("RCH_WEBDRIVER_ADDR").unwrap_or_else(|_| "localhost:9515".into());
let config = ReacherConfig {
backend_name: backend_name.clone(),
webdriver_addr,
sentry: sentry_dsn
.ok()
.map(|dsn| SentryConfig { dsn, backend_name }),
};

let to_email = check_email_input.to_email.clone();
let response = check_email(&check_email_input, &config).await;
let response = check_email(&check_email_input).await;

debug!(
target: LOG_TARGET,
Expand Down
Loading

0 comments on commit f75e0c4

Please sign in to comment.