Skip to content

Commit

Permalink
supporting Kerberos Authentication
Browse files Browse the repository at this point in the history
  • Loading branch information
ssshow16 committed Nov 20, 2014
1 parent 27b88e5 commit 191e16e
Show file tree
Hide file tree
Showing 17 changed files with 605 additions and 491 deletions.
2 changes: 1 addition & 1 deletion RHive/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: RHive
Type: Package
Title: R and Hive
Version: 2.0-0.5
Version: 2.0-0.6
Description: RHive is an R extension facilitating
distributed computing via HIVE query.
It provides an easy to use HQL like SQL
Expand Down
13 changes: 11 additions & 2 deletions RHive/R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ rhive.env <- function(ALL=FALSE) {
)
}

rhive.connect <- function(host="127.0.0.1", port=10000, hiveServer2=NA, defaultFS=NULL, updateJar=FALSE, user=NULL, password=NULL) {
rhive.connect <- function(host="127.0.0.1", port=10000, hiveServer2=NA, defaultFS=NULL, updateJar=FALSE, user=NULL, password=NULL, db="default", properties = character(0)) {
tryCatch ( {
.rhive.connect(host=host, port=port, hiveServer2=hiveServer2, defaultFS=defaultFS, updateJar=updateJar, user=user, password=password)
.rhive.connect(host=host, port=port, hiveServer2=hiveServer2, defaultFS=defaultFS, updateJar=updateJar, user=user, password=password,db,properties)
}, error=function(e) {
.handleErr(e)
}
Expand Down Expand Up @@ -516,3 +516,12 @@ rhive.hdfs.info <- function(path) {
}
)
}

rhive.login <- function(keytab,principal,hostname) {
tryCatch ( {
.rhive.login(keytab=keytab,principal=principal,hostname=hostname)
}, error=function(e) {
.handleErr(e)
}
)
}
10 changes: 10 additions & 0 deletions RHive/R/hdfs.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
}

FSUtils <- .j2r.FSUtils()

AuthUtils <- .j2r.AuthUtils()
AuthUtils$setConfigurationUserGroup(FSUtils$getConf())

ok <- FSUtils$checkFileSystem(defaultFS)
if (!ok) {
stop(sprintf("Failed to connect to %s.", defaultFS))
Expand Down Expand Up @@ -330,3 +334,9 @@
unlink(files)
}
}

.rhive.hdfs.login <- function(){
j.auth.properties <- .getEnv("AUTH_PROPERTIES")
AuthUtils <- .j2r.AuthUtils()
AuthUtils$loginUserFromKeytab(j.auth.properties,"hdfs")
}
9 changes: 9 additions & 0 deletions RHive/R/j2r.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
FSUtils$getConf()
}

.j2r.Properties <- function() {
.jnew("java/util/Properties")
}

.j2r.EnvUtils <- function() {
J("com/nexr/rhive/util/EnvUtils")
}
Expand All @@ -36,9 +40,14 @@
}

.j2r.FSUtils <- function() {
.rhive.hdfs.login()
J("com/nexr/rhive/hadoop/FSUtils")
}

.j2r.AuthUtils <- function() {
J("com/nexr/rhive/hadoop/AuthUtils")
}

.j2r.System <- function() {
J("java/lang/System")
}
62 changes: 43 additions & 19 deletions RHive/R/rhive.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
.jinit(classpath=cp, parameters=getOption("java.parameters"))
}

.rhive.connect <- function(host="127.0.0.1", port=10000, hiveServer2=NA, defaultFS=NULL, updateJar=FALSE, user=NULL, password=NULL) {
.rhive.connect <- function(host="127.0.0.1", port=10000, hiveServer2=NA, defaultFS=NULL, updateJar=FALSE, user=NULL, password=NULL, db="default", properties = character(0)) {

initialized <- .getEnv("INITIALIZED")
if (is.null(.getEnv("HIVE_HOME")) || is.null(.getEnv("HADOOP_HOME"))) {
Expand All @@ -121,19 +121,21 @@
userName <- EnvUtils$getUserName()
userHome <- EnvUtils$getUserHome()
tmpDir <- EnvUtils$getTempDirectory()
j.properties <- .auth.properties(user, password, properties)

.setEnv("USERNAME", userName)
.setEnv("HOME", userHome)
.setEnv("TMP_DIR", tmpDir)

.setEnv("AUTH_PROPERTIES", j.properties)

System <- .j2r.System()
System$setProperty("RHIVE_UDF_DIR", .FS_UDF_DIR())
System$setProperty("RHIVE_UDF_DIR", .FS_UDF_DIR())
System$setProperty("HADOOP_CONF_DIR", .HADOOP_CONF_DIR())

if (is.null(defaultFS)) {
defaultFS <- .DEFAULT_FS()
}

.rhive.hdfs.connect(defaultFS)
.copyJarsToHdfs(updateJar)

Expand All @@ -142,7 +144,7 @@
}

hiveClient <- .j2r.HiveJdbcClient(hiveServer2)
hiveClient$connect(host, as.integer(port), user, password)
hiveClient$connect(host, as.integer(port), db, user,password, j.properties)
hiveClient$addJar(.FS_JAR_PATH())

.registerUDFs(hiveClient)
Expand All @@ -151,9 +153,31 @@
.setEnv("hiveClient", hiveClient)

.makeBaseDirs()

if (db != "default") {
.rhive.use.database(db)
}
}
}

.auth.properties <- function(user, password, properties) {
#.make.j.properties <- function(properties) {
j.properties <- .j2r.Properties()
if (!is.empty(user)) {
j.properties$setProperty("user", user)
}

if (!is.empty(password)) {
j.properties$setProperty("password", password)
}

if (!is.empty(properties)) {
l <- lapply(strsplit(properties, split = "="), function(x) { gsub("^\\s+|\\s+$", "", x) })
lapply(l, function(p) { if (length(p) == 2) { j.properties$setProperty(p[1], p[2]) } })
}
return(j.properties)
}

.copyJarsToHdfs <- function(updateJar) {
jar <- paste(system.file(package="RHive"), "java", "rhive_udf.jar", sep=.Platform$file.sep)

Expand Down Expand Up @@ -207,28 +231,28 @@

.makeBaseDirs <- function() {
if (!.rhive.hdfs.exists(.FS_BASE_DATA_DIR())) {
.dfs.mkdir(.FS_BASE_DATA_DIR())
.dfs.chmod("777", .FS_BASE_DATA_DIR())
.rhive.hdfs.mkdirs(.FS_BASE_DATA_DIR())
.rhive.hdfs.chmod("777",.FS_BASE_DATA_DIR())
}

if (!.rhive.hdfs.exists(.FS_BASE_UDF_DIR())) {
.dfs.mkdir(.FS_BASE_UDF_DIR())
.dfs.chmod("777", .FS_BASE_UDF_DIR())
.rhive.hdfs.mkdirs(.FS_BASE_UDF_DIR())
.rhive.hdfs.chmod("777",.FS_BASE_UDF_DIR())
}

if (!.rhive.hdfs.exists(.FS_BASE_TMP_DIR())) {
.dfs.mkdir(.FS_BASE_TMP_DIR())
.dfs.chmod("777", .FS_BASE_TMP_DIR())
.rhive.hdfs.mkdirs(.FS_BASE_TMP_DIR())
.rhive.hdfs.chmod("777",.FS_BASE_TMP_DIR())
}

if(!.rhive.hdfs.exists(.FS_TMP_DIR())){
.dfs.mkdir(.FS_TMP_DIR())
.dfs.chmod("777", .FS_TMP_DIR())
.rhive.hdfs.mkdirs(.FS_TMP_DIR())
.rhive.hdfs.chmod("777",.FS_TMP_DIR())
}

if (!.rhive.hdfs.exists(.FS_BASE_MR_SCRIPT_DIR())) {
.dfs.mkdir(.FS_BASE_MR_SCRIPT_DIR())
.dfs.chmod("777", .FS_BASE_MR_SCRIPT_DIR())
.rhive.hdfs.mkdirs(.FS_BASE_MR_SCRIPT_DIR())
.rhive.hdfs.chmod("777",.FS_BASE_MR_SCRIPT_DIR())
}
}

Expand Down Expand Up @@ -519,11 +543,11 @@

recs <- unlist(strsplit(desc, split = "\n", fixed = TRUE))

l <- list(colname = character(0), type = character(0), comment = character(0))
l <- list(col_name = character(0), data_type = character(0), comment = character(0))
for (i in seq_along(recs)) {
v <- unlist(strsplit(recs[i], split = "\\s+"))
l$colname[i] <- v[1]
l$type[i] <- v[2]
l$col_name[i] <- v[1]
l$data_type[i] <- v[2]
## support hive (>= 0.13.# )
if (length(v) >= 3) {
l$comment[i] <- v[3]
Expand Down
8 changes: 8 additions & 0 deletions RHive/R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@
# limitations under the License.

trim <- function (x) gsub("^\\s+|\\s+$", "", x)

is.empty <- function(val) {
if (is.null(val) || is.na(val) || length(val) == 0) {
return(TRUE)
} else {
return(FALSE)
}
}
4 changes: 2 additions & 2 deletions RHive/inst/javasrc/build.num
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#Build Number for ANT. Do not edit!
#Mon Nov 10 10:43:12 KST 2014
build.number=334
#Thu Nov 20 17:11:32 KST 2014
build.number=359
30 changes: 30 additions & 0 deletions RHive/inst/javasrc/src/com/nexr/rhive/hadoop/AuthUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package com.nexr.rhive.hadoop;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;

import java.io.IOException;
import java.util.Properties;

/**
* Created by bruceshin on 11/19/14.
*/
public class AuthUtils {

public static void loginUserFromKeytab(Properties properties, String service) throws IOException{

String principal = properties.getProperty(service + ".principal");
String keytab = properties.getProperty(service + ".keytab");

if(StringUtils.isEmpty(principal) || StringUtils.isEmpty(keytab)){
return;
}

UserGroupInformation.loginUserFromKeytab(principal, keytab);
}

public static void setConfigurationUserGroup(Configuration conf){
UserGroupInformation.setConfiguration(conf);
}
}
Loading

2 comments on commit 191e16e

@ranjana-altiscale
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that the code for kerberos has been merged here. It will be nice if there is a summarization about how to actually connect to HS2 when there is kerberos authentication.

@ranjana-altiscale
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried the following:

R
library(RHive)
rhive.init(hiveHome="/opt/hive", hiveLib="/opt/hive/lib:/rhive/lib/2.0-0.2", hadoopHome="/opt/hadoop", hadoopConf="/etc/hadoop", hadoopLib="/opt/hadoop/lib", verbose=FALSE)
rhive.login(keytab=NULL, principal="hiveserver/[email protected]", hostname="hiveserver-XXXXXX.altiscale.com")
Error: could not find function "rhive.login"
rhive.connect("hiveserver-ranjana-ci-XXXXXXXX.altiscale.com",10000, principal="hiveserver/[email protected]")
Error in rhive.connect("hiveserver-ranjana-ci-XXXXXXXXX.com", :
unused argument (principal = "hiveserver/[email protected]")

Please sign in to comment.