Skip to content

Commit

Permalink
Revision of core c++
Browse files Browse the repository at this point in the history
Flood Egret SACLANT Irish Republican Army Law enforcement UMTS
Radiation BLU-97 A/B MD2 Crash AMW Emergency management UHF enigma
Perl-RSA
  • Loading branch information
benjamin-james committed Apr 13, 2024
1 parent 70e4284 commit 82e793e
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 183 deletions.
2 changes: 0 additions & 2 deletions cleanup

This file was deleted.

2 changes: 1 addition & 1 deletion scdemon/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import _core as core
#from . import _core as core

from .robust_se import robust_se_default, robust_prepare
from .utils import (
Expand Down
54 changes: 11 additions & 43 deletions scdemon/py_se.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,19 @@

namespace py = pybind11;


// todo fix by using .cast<float>() in se.hpp
//template<typename TY, typename TU, typename TB>
Eigen::ArrayXd py_robust_se_X(const py::EigenDRef<Eigen::MatrixXd> &X,
const py::EigenDRef<Eigen::MatrixXd> &Y,
double lambda=1e-10)
{
return robust_se_X(X, Y, lambda, 1e-300);
}

Eigen::SparseMatrix<double> py_robust_se(const py::EigenDRef<Eigen::MatrixXd> &X,
const py::EigenDRef<Eigen::MatrixXd> &Y,
double lambda=1e-10,
double t_cutoff=6.5,
bool abs_t=false)
{
return robust_se(X, Y, lambda, 1e-300, t_cutoff, abs_t);
}

Eigen::MatrixXd py_ols_beta(const py::EigenDRef<Eigen::MatrixXd> &X,
const py::EigenDRef<Eigen::MatrixXd> &Y,
double lambda
)
{
return ols_beta(X, Y, lambda);
}

Eigen::MatrixXd py_ols_resid(const py::EigenDRef<Eigen::MatrixXd> &X,
const py::EigenDRef<Eigen::MatrixXd> &Y,
const py::EigenDRef<Eigen::MatrixXd> &beta)
template<typename TU, typename TV>
Eigen::SparseMatrix<float> py_robust_se(const py::EigenDRef<TU> &U,
const py::EigenDRef<TV> &V,
float lambda,
float t_cutoff,
bool abs_cutoff)
{
return ols_resid(X, Y, beta);
return intra_robust_se(U, V, lambda, t_cutoff, abs_cutoff);
}

PYBIND11_MODULE(_core, m) {
m.def("py_ols_beta", &py_ols_beta);
m.def("py_ols_resid", &py_ols_resid);
m.def("py_robust_se_X", &py_robust_se_X);
m.def("py_robust_se", &py_robust_se);
// <Eigen::MatrixXd, Eigen::MatrixXd, Eigen::MatrixXd>);
// m.def("robust_se_X_ddf", &py_robust_se_X<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::MatrixXf>);
// m.def("robust_se_X_dfd", &py_robust_se_X<Eigen::MatrixXd, Eigen::MatrixXf, Eigen::MatrixXd>);
// m.def("robust_se_X_dff", &py_robust_se_X<Eigen::MatrixXd, Eigen::MatrixXf, Eigen::MatrixXf>);
// m.def("robust_se_X_fdd", &py_robust_se_X<Eigen::MatrixXf, Eigen::MatrixXd, Eigen::MatrixXd>);
// m.def("robust_se_X_fdf", &py_robust_se_X<Eigen::MatrixXf, Eigen::MatrixXd, Eigen::MatrixXf>);
// m.def("robust_se_X_ffd", &py_robust_se_X<Eigen::MatrixXf, Eigen::MatrixXf, Eigen::MatrixXd>);
// m.def("robust_se_X_fff", &py_robust_se_X<Eigen::MatrixXf, Eigen::MatrixXf, Eigen::MatrixXf>);
m.def("py_robust_se_ff", &py_robust_se<Eigen::MatrixXf, Eigen::MatrixXf>);
m.def("py_robust_se_df", &py_robust_se<Eigen::MatrixXd, Eigen::MatrixXf>);
m.def("py_robust_se_fd", &py_robust_se<Eigen::MatrixXf, Eigen::MatrixXd>);
m.def("py_robust_se_dd", &py_robust_se<Eigen::MatrixXd, Eigen::MatrixXd>);
}
59 changes: 29 additions & 30 deletions scdemon/robust_se.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,63 @@
import numpy as np
import scipy.sparse

from . import _core as core
from .utils import ProgressManager, _interrupt_checker

def _robust_se_X(i:int, V:np.ndarray, lamb:float=1e-10) -> np.ndarray:
return core.py_robust_se_X(V[:, i].astype("f8"), V.astype("f8"), lamb)

def _robust_se(X:np.ndarray, V:np.ndarray, lamb:float, t_cutoff:float, abs_t:bool) -> scipy.sparse.csc_matrix:
return core.py_robust_se(X.astype("f8"), V.astype('f8'), lamb, t_cutoff, abs_t)

def ols_resid(X:np.ndarray, Y:np.ndarray, beta:np.ndarray) -> np.ndarray:
return core.py_ols_resid(X.astype("f8"), Y.astype("f8"), beta.astype("f8"))

def ols_beta(X:np.ndarray, Y:np.ndarray) -> np.ndarray:
return core.py_ols_beta(X.astype("f8"), Y.astype("f8"))
#from .utils import ProgressManager, _interrupt_checker

def _robust_se(U:np.ndarray, V:np.ndarray, lamb:float=100, t_cutoff:float=6.5, abs_t:bool=False) -> scipy.sparse.csc_matrix:
from . import _core as core
if U.dtype == np.float32 and V.dtype == np.float32:
return core.py_robust_se_ff(U, V, lamb, t_cutoff, abs_t)
elif U.dtype == np.float64 and V.dtype == np.float64:
return core.py_robust_se_dd(U, V, lamb, t_cutoff, abs_t)
elif U.dtype == np.float32 and V.dtype == np.float64:
return core.py_robust_se_fd(U, V, lamb, t_cutoff, abs_t)
elif U.dtype == np.float64 and V.dtype == np.float32:
return core.py_robust_se_df(U, V, lamb, t_cutoff, abs_t)
else:
raise NotImplementedError("Not implemented for numpy dtype")

def robust_prepare(U:np.ndarray, V:np.ndarray, B=None, n_components:int=None, min_norm:float=1e-5, return_U=False):
def robust_prepare(U:np.ndarray, V:np.ndarray, B=None, n_components:int=None, min_norm:float=1e-5, return_U=True, power:float=0):
if U.shape[1] != V.shape[0]:
raise ValueError("Shapes must match")
if n_components != None:
if n_components <= 0:
raise ValueError("N components must be > 0")
U = U[:, :n_components]
V = V[:n_components, :]
bad_flag = np.linalg.norm(V, axis=0, ord=2) < min_norm
if B is None:
B = np.ones((U.shape[0], 1))
return U, V[:, ~bad_flag], ~bad_flag
if U.shape[0] != B.shape[0]:
raise ValueError("U and B are not the correct shape")
V1 = np.nan * np.ones_like(V)
bad_flag = np.linalg.norm(V, axis=0, ord=2) < min_norm
V_svd = np.linalg.svd(V[:, ~bad_flag], full_matrices=False)
U = U @ V_svd.U
qr = np.linalg.qr(ols_resid(X=B, Y=U, beta=ols_beta(X=B, Y=U)))
qr = np.linalg.qr(U - B @ (np.linalg.pinv(B) @ U))
RS_svd = np.linalg.svd(qr.R @ np.diag(V_svd.S))
V1[:, ~bad_flag] = np.diag(RS_svd.S) @ RS_svd.Vh @ V_svd.Vh
dof = B.shape[0] - B.shape[1]
V1[:, ~bad_flag] = np.diag(RS_svd.S**power) @ RS_svd.Vh @ V_svd.Vh
if return_U:
return V1, dof, qr.Q @ RS_svd.U
return qr.Q @ RS_svd.U @ np.diag(RS_svd.S**(1-power)), V1, ~bad_flag
else:
return V1, dof, None
return None, V1, ~bad_flag

def robust_se_default(U, V, B=None, t_cutoff:float=None, abs_t:bool=False, nominal_p_cutoff:float=0.05) -> scipy.sparse.csc_matrix:
def robust_se_default(U, V, B=None, t_cutoff:float=None, abs_t:bool=False, lamb:float=100., nominal_p_cutoff:float=0.05, n_components:int=None, min_norm:float=1e-5) -> scipy.sparse.csc_matrix:
"""
U: U from SVD
V: V\Sigma from SVD
B: Batch matrix from pd.get_dummies, or just intercept.
"""
if U.shape[1] != V.shape[0]:
raise ValueError("U and V must have compatible dimensions : %d != %d" % (U.shape[1], V.shape[0]))
if B is None:
B = np.ones((U.shape[0], 1), dtype="f8")
elif U.shape[0] != B.shape[0]:
raise ValueError("U and B must have compatible dimensions : %d != %d" % (U.shape[0], B.shape[0]))
UpB = core.py_ols_beta(U.astype("f8"), B.astype("f8"))
UpU = core.py_ols_beta(U.astype("f8"), U.astype("f8"))
U, V, var_flag = robust_prepare(U, V, B=B, n_components=n_components, min_norm=min_norm, return_U=True)
if t_cutoff is None:
import scipy.stats
t_cutoff = scipy.stats.t.isf(nominal_p_cutoff * V.shape[1]**-2,
B.shape[0] - B.shape[1])
return _robust_se(V, V, t_cutoff=t_cutoff, abs_t=abs_t)
max(U.shape[0] - V.shape[1] - 2, 1))
S = _robust_se(U, V, t_cutoff=t_cutoff, abs_t=abs_t, lamb=lamb)
I = np.ravel(np.where(var_flag))
trans = scipy.sparse.csr_matrix((np.ones_like(I), (I, np.arange(len(I)))),
shape=(len(var_flag), S.shape[0]))
return trans.dot(S).dot(trans.T)


18 changes: 14 additions & 4 deletions src/implprogress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,21 @@
#include <iostream>
#endif

#ifndef BAR_WIDTH
#define BAR_WIDTH 60
#endif
class ImplProgress
{
private:
#if defined(Rcpp_hpp)
std::unique_ptr<Progress> p;
#else
unsigned long total, current, last;
bool aborted;
bool aborted;
#endif
public:
#if defined(Rcpp_hpp)
ImplProgress(unsigned long count) : p(std::make_unique<Progress>(count, true)) {}
ImplProgress(unsigned long count) : p(std::make_unique<Progress>(count, true)) {}
~ImplProgress() = default;
#else
ImplProgress(unsigned long count) : total(count), current(0), last(0), aborted(false) {}
Expand Down Expand Up @@ -48,10 +51,17 @@ class ImplProgress
double cur_percent = current / total;
if (cur_percent >= 0.01 + last_percent) {
int pct = 100 * cur_percent;
std::cout << pct << "%" << std::endl;
int pos = cur_percent * BAR_WIDTH;
std::string out(BAR_WIDTH, ' ');
for (int i = 0; i < BAR_WIDTH; i++) {
if (i < pos) { out[i] = '='; }
else if (i == pos) { out[i] = '>'; }
}
std::cout << "[" << out << "] " << pct << "%\r";
std::cout.flush();
last = current;
}
#endif
#endif
}
void increment(unsigned long count=1) {
#if defined(Rcpp_hpp)
Expand Down
Loading

0 comments on commit 82e793e

Please sign in to comment.