diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala index 01266eb2c85..4aaeaccd7c0 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala @@ -28,6 +28,7 @@ import org.slf4j.LoggerFactory import org.apache.kyuubi.plugin.spark.authz.OperationType.OperationType import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType._ import org.apache.kyuubi.plugin.spark.authz.rule.Authorization._ +import org.apache.kyuubi.plugin.spark.authz.rule.plan.ChildOutputHolder import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter._ import org.apache.kyuubi.plugin.spark.authz.serde._ import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ @@ -100,13 +101,18 @@ object PrivilegesBuilder { privilegeObjects += PrivilegeObject(table) case p => + val existsChildOutputHolder = p.exists(_.isInstanceOf[ChildOutputHolder]) for (child <- p.children) { // If current plan's references don't have relation to it's input, have two cases // 1. `MapInPandas`, `ScriptTransformation` // 2. `Project` output only have constant value if (columnPrune(p.references.toSeq ++ p.output, p.inputSet).isEmpty) { - // If plan is project and output don't have relation to input, can ignore. - if (!p.isInstanceOf[Project]) { + // 1. If plan is project and output don't have relation to input, can ignore. + // 2. If sub logic plan tree exists ChildOutputHolder node, it means that the output of + // some nodes in the tree is fixed by RuleChildOutputMarker in some special + // scenarios, such as the Aggregate(count(*)) child node. To avoid missing child node + // permissions, we need to continue checking down. + if (!p.isInstanceOf[Project] || existsChildOutputHolder) { buildQuery( child, privilegeObjects, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala index 288719f07bf..8e61783b6a8 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala @@ -19,11 +19,12 @@ package org.apache.kyuubi.plugin.spark.authz.ranger import org.apache.spark.sql.SparkSessionExtensions -import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker, RuleEliminatePermanentViewMarker, RuleEliminateTypeOf} +import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateChildOutputHolder, RuleEliminateMarker, RuleEliminatePermanentViewMarker, RuleEliminateTypeOf} import org.apache.kyuubi.plugin.spark.authz.rule.config.AuthzConfigurationChecker import org.apache.kyuubi.plugin.spark.authz.rule.datamasking.{RuleApplyDataMaskingStage0, RuleApplyDataMaskingStage1} import org.apache.kyuubi.plugin.spark.authz.rule.expression.RuleApplyTypeOfMarker import org.apache.kyuubi.plugin.spark.authz.rule.permanentview.RuleApplyPermanentViewMarker +import org.apache.kyuubi.plugin.spark.authz.rule.plan.RuleChildOutputMarker import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter.{FilterDataSourceV2Strategy, RuleApplyRowFilter, RuleReplaceShowObjectCommands} /** @@ -48,6 +49,7 @@ class RangerSparkExtension extends (SparkSessionExtensions => Unit) { v1.injectResolutionRule(_ => RuleReplaceShowObjectCommands) v1.injectResolutionRule(_ => RuleApplyPermanentViewMarker) v1.injectResolutionRule(_ => RuleApplyTypeOfMarker) + v1.injectResolutionRule(_ => RuleChildOutputMarker) v1.injectResolutionRule(RuleApplyRowFilter) v1.injectResolutionRule(RuleApplyDataMaskingStage0) v1.injectResolutionRule(RuleApplyDataMaskingStage1) @@ -55,6 +57,7 @@ class RangerSparkExtension extends (SparkSessionExtensions => Unit) { v1.injectOptimizerRule(RuleAuthorization) v1.injectOptimizerRule(RuleEliminatePermanentViewMarker) v1.injectOptimizerRule(_ => RuleEliminateTypeOf) + v1.injectOptimizerRule(_ => RuleEliminateChildOutputHolder) v1.injectPlannerStrategy(FilterDataSourceV2Strategy) } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateChildOutputHolder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateChildOutputHolder.scala new file mode 100644 index 00000000000..faa95a7b40e --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateChildOutputHolder.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule + +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule + +import org.apache.kyuubi.plugin.spark.authz.rule.plan.ChildOutputHolder + +/** + * Transforming down [[ChildOutputHolder]] + */ +object RuleEliminateChildOutputHolder extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = { + plan.transform { + case p @ ChildOutputHolder(child, _) => child + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/ChildOutputHolder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/ChildOutputHolder.scala new file mode 100644 index 00000000000..739cde12ab2 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/ChildOutputHolder.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule.plan + +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} + +import org.apache.kyuubi.plugin.spark.authz.util.WithInternalChild + +case class ChildOutputHolder(child: LogicalPlan, childOutput: Seq[Attribute]) + extends UnaryNode with WithInternalChild { + + val output: Seq[Attribute] = childOutput + + override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(child = newChild) +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/RuleChildOutputMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/RuleChildOutputMarker.scala new file mode 100644 index 00000000000..a4a6bee28f7 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/plan/RuleChildOutputMarker.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule.plan + +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.Rule + +/** + * Rule to add [[ChildOutputHolder]] used to fixed logic plan's child node's output, + * now used for following cases: + * + * 1. Aggregate(count(*)/count(1)), it's child node will be pruned in Spark optimizer + * rule [[org.apache.spark.sql.catalyst.optimizer.ColumnPruning]]. + */ +object RuleChildOutputMarker extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = { + plan.transform { + case a @ Aggregate(_, _, child) + if !child.isInstanceOf[ChildOutputHolder] && + child.outputSet.intersect(a.references).isEmpty => + a.copy(child = ChildOutputHolder(child, child.output)) + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index 1fdea0ed969..e378c40bfc9 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -961,11 +961,36 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |AS |SELECT count(*) as cnt, sum(id) as sum_id FROM $db1.$table1 """.stripMargin)) - checkAnswer(someone, s"SELECT count(*) FROM $db1.$table1", Row(0) :: Nil) - checkAnswer(someone, s"SELECT count(*) FROM $db1.$view1", Row(0) :: Nil) + checkAnswer(admin, s"SELECT count(*) FROM $db1.$table1", Row(0) :: Nil) - checkAnswer(someone, s"SELECT count(*) FROM $db1.$view2", Row(1) :: Nil) + checkAnswer(admin, s"SELECT count(*) FROM $db1.$view1", Row(0) :: Nil) + + checkAnswer(admin, s"SELECT count(*) FROM $db1.$view2", Row(1) :: Nil) + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(*) FROM $db1.$table1").show()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(1) FROM $db1.$table1").show()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(*) FROM $db1.$view1").show()))( + s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(1) FROM $db1.$view1").show()))( + s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(*) FROM $db1.$view2").show()))( + s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(1) FROM $db1.$view2").show()))( + s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]") interceptEndsWith[AccessControlException]( doAs(someone, sql(s"SELECT count(id) FROM $db1.$table1 WHERE id > 10").show()))( @@ -1500,13 +1525,33 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) doAs(admin, sql(s"CREATE VIEW $db1.$view1 AS SELECT * FROM $db1.$table1")) checkAnswer( - someone, + admin, s"SELECT count(*) FROM $db1.$table1 WHERE id > 1", Row(0) :: Nil) checkAnswer( - someone, + admin, s"SELECT count(*) FROM $db1.$view1 WHERE id > 1", Row(0) :: Nil) + interceptContains[AccessControlException]( + doAs( + someone, + sql(s"SELECT count(*) FROM $db1.$table1 WHERE id > 1").collect()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]") + interceptContains[AccessControlException]( + doAs( + someone, + sql(s"SELECT count(1) FROM $db1.$table1 WHERE id > 1").collect()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]") + interceptContains[AccessControlException]( + doAs( + someone, + sql(s"SELECT count(*) FROM $db1.$view1 WHERE id > 1").collect()))( + s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]") + interceptContains[AccessControlException]( + doAs( + someone, + sql(s"SELECT count(1) FROM $db1.$view1 WHERE id > 1").collect()))( + s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]") interceptContains[AccessControlException]( doAs( someone, @@ -1542,4 +1587,57 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { } } } + + test("select count(*)/count(1) should not ignore privileges") { + val db1 = defaultDb + val table1 = "table1" + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs( + admin, + sql( + s"""CREATE TABLE IF NOT EXISTS $db1.$table1 + |(id int, scope int, part string) + |PARTITIONED BY(part) + |""".stripMargin)) + + interceptContains[AccessControlException]( + doAs(someone, sql(s"select count(*) from $db1.$table1").show()))( + s"does not have [select] privilege on " + + s"[$db1/$table1/id,$db1/$table1/scope,$db1/$table1/part]") + + interceptContains[AccessControlException]( + doAs(someone, sql(s"select count(id) from $db1.$table1").show()))( + s"does not have [select] privilege on [$db1/$table1/id]") + + interceptContains[AccessControlException]( + doAs( + someone, + sql( + s"""select count(1) from ( + | select id, part from $db1.$table1 + |) t where part = 'part-1' + |""".stripMargin).show()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/part]") + + interceptContains[AccessControlException]( + doAs( + someone, + sql( + s"""select cnt from ( + | select count(1) as cnt from ( + | select id from $db1.$table1 where part = 'part-1' + | ) t1 + |) t2 + |""".stripMargin).show()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/part]") + + // df.count() same with select count(*) + interceptContains[AccessControlException]( + doAs(someone, sql(s"select id from $db1.$table1 where part = 'part-1'").toDF().count()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/part]") + } + + } + } }