diff --git a/crates/ruff_linter/resources/test/fixtures/airflow/AIR301.py b/crates/ruff_linter/resources/test/fixtures/airflow/AIR301.py new file mode 100644 index 0000000000000..1841d3c192872 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/airflow/AIR301.py @@ -0,0 +1,15 @@ +from airflow import DAG, dag + +DAG(dag_id="class_default_schedule") + +DAG(dag_id="class_schedule", schedule="@hourly") + + +@dag() +def decorator_default_schedule(): + pass + + +@dag(schedule="0 * * * *") +def decorator_schedule(): + pass diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index dd03609194556..7c27f1a1f01d1 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -11,8 +11,8 @@ use ruff_text_size::Ranged; use crate::checkers::ast::Checker; use crate::registry::Rule; use crate::rules::{ - flake8_2020, flake8_async, flake8_bandit, flake8_boolean_trap, flake8_bugbear, flake8_builtins, - flake8_comprehensions, flake8_datetimez, flake8_debugger, flake8_django, + airflow, flake8_2020, flake8_async, flake8_bandit, flake8_boolean_trap, flake8_bugbear, + flake8_builtins, flake8_comprehensions, flake8_datetimez, flake8_debugger, flake8_django, flake8_future_annotations, flake8_gettext, flake8_implicit_str_concat, flake8_logging, flake8_logging_format, flake8_pie, flake8_print, flake8_pyi, flake8_pytest_style, flake8_self, flake8_simplify, flake8_tidy_imports, flake8_type_checking, flake8_use_pathlib, flynt, numpy, @@ -1070,6 +1070,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::UnrawRePattern) { ruff::rules::unraw_re_pattern(checker, call); } + if checker.enabled(Rule::AirflowDagNoScheduleArgument) { + airflow::rules::dag_no_schedule_argument(checker, expr); + } } Expr::Dict(dict) => { if checker.any_enabled(&[ diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 5dc33a1c36216..a14bc7e23a58d 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -1033,6 +1033,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { // airflow (Airflow, "001") => (RuleGroup::Stable, rules::airflow::rules::AirflowVariableNameTaskIdMismatch), + (Airflow, "301") => (RuleGroup::Preview, rules::airflow::rules::AirflowDagNoScheduleArgument), // perflint (Perflint, "101") => (RuleGroup::Stable, rules::perflint::rules::UnnecessaryListCast), diff --git a/crates/ruff_linter/src/rules/airflow/mod.rs b/crates/ruff_linter/src/rules/airflow/mod.rs index 34ba5e4c4a14a..4e4130766022f 100644 --- a/crates/ruff_linter/src/rules/airflow/mod.rs +++ b/crates/ruff_linter/src/rules/airflow/mod.rs @@ -13,6 +13,7 @@ mod tests { use crate::{assert_messages, settings}; #[test_case(Rule::AirflowVariableNameTaskIdMismatch, Path::new("AIR001.py"))] + #[test_case(Rule::AirflowDagNoScheduleArgument, Path::new("AIR301.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff_linter/src/rules/airflow/rules/dag_schedule_argument.rs b/crates/ruff_linter/src/rules/airflow/rules/dag_schedule_argument.rs new file mode 100644 index 0000000000000..50abf69304cda --- /dev/null +++ b/crates/ruff_linter/src/rules/airflow/rules/dag_schedule_argument.rs @@ -0,0 +1,84 @@ +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::Expr; +use ruff_python_ast::{self as ast}; +use ruff_python_semantic::Modules; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for a `DAG()` class or `@dag()` decorator without an explicit +/// `schedule` parameter. +/// +/// ## Why is this bad? +/// The default `schedule` value on Airflow 2 is `timedelta(days=1)`, which is +/// almost never what a user is looking for. Airflow 3 changes this the default +/// to *None*, and would break existing DAGs using the implicit default. +/// +/// If your DAG does not have an explicit `schedule` argument, Airflow 2 +/// schedules a run for it every day (at the time determined by `start_date`). +/// Such a DAG will no longer be scheduled on Airflow 3 at all, without any +/// exceptions or other messages visible to the user. +/// +/// ## Example +/// ```python +/// from airflow import DAG +/// +/// +/// # Using the implicit default schedule. +/// dag = DAG(dag_id="my_dag") +/// ``` +/// +/// Use instead: +/// ```python +/// from datetime import timedelta +/// +/// from airflow import DAG +/// +/// +/// dag = DAG(dag_id="my_dag", schedule=timedelta(days=1)) +/// ``` +#[violation] +pub struct AirflowDagNoScheduleArgument; + +impl Violation for AirflowDagNoScheduleArgument { + #[derive_message_formats] + fn message(&self) -> String { + "DAG should have an explicit `schedule` argument".to_string() + } +} + +/// AIR301 +pub(crate) fn dag_no_schedule_argument(checker: &mut Checker, expr: &Expr) { + if !checker.semantic().seen_module(Modules::AIRFLOW) { + return; + } + + // Don't check non-call expressions. + let Expr::Call(ast::ExprCall { + func, arguments, .. + }) = expr + else { + return; + }; + + // We don't do anything unless this is a `DAG` (class) or `dag` (decorator + // function) from Airflow. + if !checker + .semantic() + .resolve_qualified_name(func) + .is_some_and(|qualname| matches!(qualname.segments(), ["airflow", .., "DAG" | "dag"])) + { + return; + } + + // If there's a `schedule` keyword argument, we are good. + if arguments.find_keyword("schedule").is_some() { + return; + } + + // Produce a diagnostic when the `schedule` keyword argument is not found. + let diagnostic = Diagnostic::new(AirflowDagNoScheduleArgument, expr.range()); + checker.diagnostics.push(diagnostic); +} diff --git a/crates/ruff_linter/src/rules/airflow/rules/mod.rs b/crates/ruff_linter/src/rules/airflow/rules/mod.rs index f9917250a865a..b01391092ca77 100644 --- a/crates/ruff_linter/src/rules/airflow/rules/mod.rs +++ b/crates/ruff_linter/src/rules/airflow/rules/mod.rs @@ -1,3 +1,5 @@ +pub(crate) use dag_schedule_argument::*; pub(crate) use task_variable_name::*; +mod dag_schedule_argument; mod task_variable_name; diff --git a/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR301_AIR301.py.snap b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR301_AIR301.py.snap new file mode 100644 index 0000000000000..b2afb063a40c2 --- /dev/null +++ b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR301_AIR301.py.snap @@ -0,0 +1,20 @@ +--- +source: crates/ruff_linter/src/rules/airflow/mod.rs +--- +AIR301.py:3:1: AIR301 DAG should have an explicit `schedule` argument + | +1 | from airflow import DAG, dag +2 | +3 | DAG(dag_id="class_default_schedule") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AIR301 +4 | +5 | DAG(dag_id="class_schedule", schedule="@hourly") + | + +AIR301.py:8:2: AIR301 DAG should have an explicit `schedule` argument + | + 8 | @dag() + | ^^^^^ AIR301 + 9 | def decorator_default_schedule(): +10 | pass + | diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 2546637734501..8f3235d7b0715 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -1289,6 +1289,7 @@ impl<'a> SemanticModel<'a> { "typing" => self.seen.insert(Modules::TYPING), "typing_extensions" => self.seen.insert(Modules::TYPING_EXTENSIONS), "attr" | "attrs" => self.seen.insert(Modules::ATTRS), + "airflow" => self.seen.insert(Modules::AIRFLOW), _ => {} } } @@ -1860,6 +1861,7 @@ bitflags! { const FLASK = 1 << 24; const ATTRS = 1 << 25; const REGEX = 1 << 26; + const AIRFLOW = 1 << 27; } } diff --git a/ruff.schema.json b/ruff.schema.json index c70b5df565454..f45b698932d92 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2794,6 +2794,9 @@ "AIR0", "AIR00", "AIR001", + "AIR3", + "AIR30", + "AIR301", "ALL", "ANN", "ANN0",