diff --git a/.gitmodules b/.gitmodules index 749f742..f73554d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "FLAM"] path = chunkie/FLAM - url = https://github.com/klho/FLAM.git + url = https://github.com/fastalgorithms/FLAM.git [submodule "chunkie/fmm2d"] path = chunkie/fmm2d diff --git a/chunkie/+chnk/+flam/nproxy_square.m b/chunkie/+chnk/+flam/nproxy_square.m index 9b29eb4..448ed6b 100644 --- a/chunkie/+chnk/+flam/nproxy_square.m +++ b/chunkie/+chnk/+flam/nproxy_square.m @@ -40,60 +40,41 @@ srcinfo.d2 = randn(2,nsrc); srcinfo.n = [-srcinfo.d(2,:);srcinfo.d(1,:)] ./ sqrt(sum(srcinfo.d.^2,1)); - npxy = 16; + % strengths of random sources + sig = randn(nsrc,1); - % double until you get enough - for i = 1:11 - [pr,ptau] = chnk.flam.proxy_square_pts(npxy); - targinfo.r = pr*width; + npxy = 64; + + % double until you get enough + last_intgrl = NaN; + one_more = true; + for i = 0:14 + [pr,ptau,pw] = chnk.flam.proxy_square_pts(npxy); + targinfo.r = width*pr; targinfo.d = ptau; targinfo.d2 = zeros(2,npxy); targinfo.n = [-ptau(2,:);ptau(1,:)] ./ sqrt(sum(ptau.^2,1)); - if npxy > nsrc - nsrc = ceil(1.2*npxy); - srcinfo = []; - srcinfo.r = [-0.5;-0.5]*width + rand(2,nsrc)*width; - - srcinfo.d = randn(2,nsrc); - srcinfo.d2 = randn(2,nsrc); - srcinfo.n = [-srcinfo.d(2,:);srcinfo.d(1,:)] ./ sqrt(sum(srcinfo.d.^2,1)); - end - - mat = proxy_square_mat(kern,srcinfo,targinfo); - - [sk,~] = id(mat,rank_or_tol); - - if length(sk) < npxy - npxy = floor((length(sk)-1)/4+1.1)*4; + % compute integral along proxy surface of potential from random sources + % to see if we've resolved the kernel + intgrl = pw' * kern(srcinfo,targinfo) * sig; + + % check self convergence + err = abs(intgrl - last_intgrl) / abs(intgrl); + if err < rank_or_tol || ~one_more return; end npxy = 2*npxy; - end - - % npxy is never smaller than nsrc... so no compression is happening... - npxy = -1; - -end + last_intgrl = intgrl; -function mat = proxy_square_mat(kern,srcinfo,targinfo) - - if numel(kern) == 1 - mat = kern(srcinfo,targinfo); - return; - end - - % kern is a cellmat - - [m,n] = size(kern); - - mat = cell(m,n); - - for i=1:m - for j=1:n - mat{i,j} = kern{i,j}(srcinfo,targinfo); + % if using very low tolerance like 1e-14, just get to 1e-12 error + % and double one more time to avoid mucking around at machine eps + if rank_or_tol < 1e-12 && err < 1e-12 + one_more = false; end end - mat = cell2mat(mat); -end + % kernel was not resolved even with 2^14 points, report failure + npxy = -1; + +end \ No newline at end of file diff --git a/chunkie/+chnk/+flam/proxy_square_pts.m b/chunkie/+chnk/+flam/proxy_square_pts.m index 3f1d362..1f12097 100644 --- a/chunkie/+chnk/+flam/proxy_square_pts.m +++ b/chunkie/+chnk/+flam/proxy_square_pts.m @@ -11,7 +11,6 @@ % pr - (2,porder) array of proxy point locations (centered at 0 for a % box of side length 1) - if nargin < 1 porder = 64; end @@ -20,11 +19,10 @@ opts = []; end -iflege = 0; +iflege = 1; if isfield(opts, 'iflege') iflege = opts.iflege; end - assert(mod(porder,4) == 0, ... 'number of proxy points on square should be multiple of 4') @@ -33,21 +31,26 @@ if ~iflege pts = -1.5+3*(0:(po4-1))*1.0/po4; + one = ones(size(pts)); pw = ones(porder,1)*(4.0*3.0/porder); else - [xleg, wleg] = lege.exps(po4); - pts = xleg.'*1.5; - wleg = wleg*1.5; - pw = [wleg; wleg; wleg; wleg]; + % generate panel Gauss-Legendre rule on [-1.5, 1.5] + % because large single-panel rules lose digits + k = min(16, po4); + npanel = po4/k; + [pts, wts] = lege.exps(k); + panels = linspace(-1.5, 1.5, npanel + 1); + pts = reshape(panels(1:end-1) + 3/(2*npanel) * (pts + 1), 1, []); + wts = 3/(2*npanel) * repmat(wts, npanel, 1)'; + one = ones(1,porder/4); + pw = repmat(wts', 4, 1); end -one = ones(size(pts)); - pr = [pts, one*1.5, -pts, -1.5*one; - -1.5*one, pts, 1.5*one, -pts]; + -1.5*one, pts, 1.5*one, -pts]; ptau = [one, one*0, -one, one*0; - one*0, one, one*0, -one]; + one*0, one, one*0, -one]; pin = @(x) max(abs(x),[],1) < 1.5; diff --git a/chunkie/+chnk/chunkerkerneval_smooth.m b/chunkie/+chnk/chunkerkerneval_smooth.m index a6ad45a..14241bb 100644 --- a/chunkie/+chnk/chunkerkerneval_smooth.m +++ b/chunkie/+chnk/chunkerkerneval_smooth.m @@ -166,11 +166,20 @@ npxy = chnk.flam.nproxy_square(kerneval,width); [pr,ptau,pw,pin] = chnk.flam.proxy_square_pts(npxy); - pxyfun = @(rc,rx,cx,slf,nbr,l,ctr) chnk.flam.proxyfunr(rc,rx,slf,nbr,l, ... - ctr,chnkr,kerneval,opdims,pr,ptau,pw,pin); + verb = false; % TODO: make this an option to chunkerkerneval? + optsnpxy = []; optsnpxy.rank_or_tol = opts.eps; + pxyfun = @(lvl) proxyfunrbylevel(width,lvl,optsnpxy, ... + chnkr,wts,kerneval,opdims,verb && opts.proxybylevel); + if ~opts.proxybylevel + % if not using proxy-by-level, always use pxyfunr from level 1 + pxyfun = pxyfun(1); + end - optsifmm=[]; optsifmm.Tmax=Inf; - F = ifmm(matfun,targinfo_flam.r,xflam1,200,1e-14,pxyfun,optsifmm); + optsifmm=[]; + optsifmm.Tmax=Inf; + optsifmm.proxybylevel = opts.proxybylevel; + optsifmm.verb = verb; + F = ifmm(matfun,targsflam,xflam1,200,1e-14,pxyfun,optsifmm); fints = ifmm_mv(F,dens(:),matfun); fints = fints(:); else @@ -224,3 +233,16 @@ end % sum(fints) end + +function pxyfunrlvl = proxyfunrbylevel(width,lvl,optsnpxy, ... + chnkr,wts,kern,opdims,verb ... + ) + npxy = chnk.flam.nproxy_square(kern,width/2^(lvl-1),optsnpxy); + [pr,ptau,pw,pin] = chnk.flam.proxy_square_pts(npxy); + + if verb; fprintf('%3d | npxy = %i\n', lvl, npxy); end + + % return the FLAM-style pxyfunr corresponding to lvl + pxyfunrlvl = @(rc,rx,cx,slf,nbr,l,ctr) chnk.flam.proxyfunr(rc,rx,slf,nbr,l, ... + ctr,chnkr,wts,kern,opdims,pr,ptau,pw,pin); +end \ No newline at end of file diff --git a/chunkie/FLAM b/chunkie/FLAM index 2c9361e..a83698e 160000 --- a/chunkie/FLAM +++ b/chunkie/FLAM @@ -1 +1 @@ -Subproject commit 2c9361e7bc8fb8c3255f33275c0f2f7b7bbb7572 +Subproject commit a83698efdd58293c0e4c0c52f59498ffb9da9a99 diff --git a/chunkie/chunkerflam.m b/chunkie/chunkerflam.m index e693e97..c591359 100644 --- a/chunkie/chunkerflam.m +++ b/chunkie/chunkerflam.m @@ -46,7 +46,10 @@ % up the FLAM compression. It may be desirable to % turn this off if there appear to be precision % issues. -% +% opts.proxybylevel = boolean (false), determine the number of +% necessary proxy points adaptively at each +% level of the factorization. Typically needed +% only for moderate / high frequency problems. % opts.quad = string ('ggqlog'), specify quadrature routine to % use. % @@ -126,6 +129,7 @@ lvlmax = inf; if isfield(opts,'lvlmax'), lvlmax = opts.lvlmax;end adaptive_correction = false; if isfield(opts,'adaptive_correction'), ... adaptive_correction = opts.adaptive_correction;end +proxybylevel = false; if isfield(opts,'proxybylevel'), proxybylevel = opts.proxybylevel;end eps = rem(rank_or_tol,1); @@ -264,10 +268,11 @@ optsnpxy = []; optsnpxy.rank_or_tol = rank_or_tol; optsnpxy.nsrc = occ; -npxy = chnk.flam.nproxy_square(kern, width, optsnpxy); +% compute number of proxy in largest box +npxy = chnk.flam.nproxy_square(kern,width,optsnpxy); if npxy == -1 - warning('chunkerflam: proxy failed, defaulting to no proxy') + warning('chunkerflam: proxy failed at highest level, defaulting to no proxy') if strcmpi(flamtype,'rskelf') F = rskelf(matfun,xflam,occ,rank_or_tol,[],struct('verb',verb,'lvlmax',lvlmax)); end @@ -277,24 +282,55 @@ return end - -[pr,ptau,pw,pin] = chnk.flam.proxy_square_pts(npxy); - if strcmpi(flamtype,'rskelf') ifaddtrans = true; - pxyfun = @(x,slf,nbr,l,ctr) chnk.flam.proxyfun(slf,nbr,l,ctr,chnkrs, ... - kern,opdims_mat,pr,ptau,pw,pin,ifaddtrans,l2scale); - F = rskelf(matfun,xflam,occ,rank_or_tol,pxyfun,struct('verb',verb,... - 'lvlmax',lvlmax)); + pxyfun = @(lvl) proxyfunbylevel(width,lvl,optsnpxy, ... + chnkrs,kern,opdims_mat,ifaddtrans,l2scale,verb && proxybylevel); + if ~proxybylevel + % if not using proxy-by-level, always use pxyfun from level 1 + pxyfun = pxyfun(1); + end + F = rskelf(matfun,xflam,occ,rank_or_tol,pxyfun, ... + struct('verb',verb,'lvlmax',lvlmax,'proxybylevel',proxybylevel)); end if strcmpi(flamtype,'rskel') - warning('chunkerflam: proxyr has not adapted for the multiple chunker case yet') - pxyfunr = @(rc,rx,cx,slf,nbr,l,ctr) chnk.flam.proxyfunr(rc,rx,slf,nbr,l, ... - ctr,chnkr,kern,opdims,pr,ptau,pw,pin); - F = rskel(matfun,xflam,xflam,occ,rank_or_tol,pxyfunr); + warning('chunkerflam: proxyr has not been adapted for the multiple chunker case yet') + pxyfunr = @(lvl) proxyfunrbylevel(width,lvl,optsnpxy, ... + chnkr,kern,opdims,verb && proxybylevel); + if ~proxybylevel + % if not using proxy-by-level, always use pxyfunr from level 1 + pxyfunr = pxyfunr(1); + end + F = rskel(matfun,xflam,xflam,occ,rank_or_tol,pxyfunr, ... + struct('verb',verb,'lvlmax',lvlmax,'proxybylevel',proxybylevel)); end +end +function pxyfunlvl = proxyfunbylevel(width,lvl,optsnpxy, ... + chnkrs,kern,opdims_mat,ifaddtrans,l2scale,verb ... + ) + npxy = chnk.flam.nproxy_square(kern,width/2^(lvl-1),optsnpxy); + [pr,ptau,pw,pin] = chnk.flam.proxy_square_pts(npxy); + if verb; fprintf('%3d | npxy = %i\n', lvl, npxy); end + + % return the FLAM-style pxyfun corresponding to lvl + pxyfunlvl = @(x,slf,nbr,l,ctr) chnk.flam.proxyfun(slf,nbr,l,ctr,chnkrs, ... + kern,opdims_mat,pr,ptau,pw,pin,ifaddtrans,l2scale); +end + +function pxyfunrlvl = proxyfunrbylevel(width,lvl,optsnpxy, ... + chnkr,kern,opdims,verb ... + ) + npxy = chnk.flam.nproxy_square(kern,width/2^(lvl-1),optsnpxy); + [pr,ptau,pw,pin] = chnk.flam.proxy_square_pts(npxy); + + if verb; fprintf('%3d | npxy = %i\n', lvl, npxy); end + + % return the FLAM-style pxyfunr corresponding to lvl + pxyfunrlvl = @(rc,rx,cx,slf,nbr,l,ctr) chnk.flam.proxyfunr(rc,rx,slf,nbr,l, ... + ctr,chnkr,kern,opdims,pr,ptau,pw,pin); end + diff --git a/chunkie/chunkerkerneval.m b/chunkie/chunkerkerneval.m index 46d6aea..85a0f82 100644 --- a/chunkie/chunkerkerneval.m +++ b/chunkie/chunkerkerneval.m @@ -27,6 +27,12 @@ % opts.flam supercedes opts.accel, if % both are true, then flam will be used. (false) % opts.accel - if = true, use specialized fmm if defined +% for the kernel or use a generic FLAM fmm to accelerate +% the smooth part of the eval. if false do direct. (true) +% opts.proxybylevel - if = true, determine the number of +% necessary proxy points adaptively at each level of the +% factorization in ifmm. Typically needed only for +% moderate / high frequency problems. (false) % for the kernel, if it doesnt exist or if too few % sources/targets, or if false, % do direct. (true) @@ -112,6 +118,7 @@ opts_use.forcefmm = false; opts_use.fac = 1.0; opts_use.eps = 1e-12; +opts_use.proxybylevel = false; cormat = []; if isfield(opts,'forcesmooth'); opts_use.forcesmooth = opts.forcesmooth; end if isfield(opts,'forceadap'); opts_use.forceadap = opts.forceadap; end @@ -129,6 +136,7 @@ if isfield(opts,'accel'); opts_use.accel = opts.accel; end if isfield(opts,'fac'); opts_use.fac = opts.fac; end if isfield(opts,'eps'); opts_use.eps = opts.eps; end +if isfield(opts,'proxybylevel'); opts_use.proxybylevel = opts.proxybylevel; end % Assign appropriate object to targinfo targinfo = []; @@ -466,4 +474,4 @@ end -end +end \ No newline at end of file diff --git a/chunkie/fmm2d b/chunkie/fmm2d index c69815d..1da75f6 160000 --- a/chunkie/fmm2d +++ b/chunkie/fmm2d @@ -1 +1 @@ -Subproject commit c69815d1f1cf4a43a5cd3756f019ce2cbac1c19d +Subproject commit 1da75f6bc979e8eb40a9e8b0b0c438dad8a271ba diff --git a/devtools/test/flamproxybylevelTest.m b/devtools/test/flamproxybylevelTest.m new file mode 100644 index 0000000..ea77f05 --- /dev/null +++ b/devtools/test/flamproxybylevelTest.m @@ -0,0 +1,187 @@ + +%FLAMPROXYBYLEVELTEST +% +% test the FLAM matrix builder with level-dependent proxy points + +clearvars; close all; +iseed = 8675309; +rng(iseed); + +zk = 50; +tol = 1e-14; +proxybylevel = true; + +% addpaths_loc(); + +cparams = []; +cparams.maxchunklen = 4.0/abs(zk); +cparams.nover = 0; +chnkr = chunkerfunc(@(t) starfish(t), cparams, []); +chnkr = sort(chnkr); +s = chnkr.arclengthfun; + +npt = chnkr.npt; +compute_dense = (npt < 5000); + +wts = weights(chnkr); wts = wts(:); + +% sources + +ns = 100; +ts = 0.0+2*pi*rand(ns,1); +rs = (3.0 + 3*rand(ns,1)); +sources = rs' .* [cos(ts)';sin(ts)']; +strengths = randn(ns,1); + +% targets + +nt = 1; +ts = 0.0+2*pi*rand(nt,1); +targets = 0.2*[cos(ts)'; sin(ts)']; +targets = targets.*repmat(rand(1,nt),2,1); + +% plot geo and sources + +figure(1) +clf +hold off +plot(chnkr) +hold on +scatter(sources(1,:),sources(2,:),'o') +scatter(targets(1,:),targets(2,:),'x') +axis equal + +% declare kernels + +kerns = kernel('helm','s', zk); +kernd = -2*kernel('helm','d',zk); + +% eval u on bdry + +srcinfo = []; srcinfo.r = sources; +targinfo = []; targinfo.r = chnkr.r(:,:); +targinfo.d = chnkr.d(:,:); +ubdry = kerns.fmm(tol,srcinfo,targinfo,strengths); + +% eval u at targets + +targinfo = []; targinfo.r = targets; +utarg = kerns.fmm(tol,srcinfo,targinfo,strengths); + +%% + +% build laplace dirichlet matrix * (-2) +if compute_dense + start = tic; + D = chunkermat(chnkr,kernd); + t1 = toc(start); + + fprintf('%5.2e s : time to assemble matrix\n',t1) + + sys = eye(npt) + D; +end + +opts = []; +opts.rank_or_tol = tol; +opts.proxybylevel = proxybylevel; +opts.flamtype = 'rskelf'; +opts.verb = true; + +start = tic; +F = chunkerflam(chnkr,kernd,1.0,opts); +t1 = toc(start); + +fprintf('%5.2e s : time for FLAM rskelf compress\n',t1) + +rhs = ubdry; +start = tic; sig_rskelf = rskelf_sv(F,rhs); t1 = toc(start); + +fprintf('%5.2e s : time for rskelf solve \n',t1) + +%% + +opts = []; +opts.rank_or_tol = tol; +opts.proxybylevel = proxybylevel; +opts.flamtype = 'rskel'; +opts.verb = true; + +start = tic; +F2 = chunkerflam(chnkr,kernd,1.0,opts); +t1 = toc(start); + +fprintf('%5.2e s : time for FLAM rskel compress\n',t1) + +rhs = ubdry; +start = tic; +mtvc_rskel = rskel_mv(F2,rhs); +t1 = toc(start); + +fprintf('%5.2e s : time for rskel apply \n',t1) + +if compute_dense + start = tic; + mtvc_dense = sys * rhs; + t1 = toc(start); + + fprintf('%5.2e s : time for dense apply\n',t1) + + err = norm(mtvc_dense-mtvc_rskel,'fro')/norm(mtvc_dense,'fro'); + + fprintf('difference between rskel and dense applies %5.2e\n\n',err) + + assert(err < 10*tol); + + start = tic; + sig_dense = gmres(sys,rhs,[],1e-14,min(npt, 1000)); + t1 = toc(start); + + fprintf('%5.2e s : time for dense gmres\n',t1) + + err = norm(sig_dense-sig_rskelf,'fro')/norm(sig_dense,'fro'); + + fprintf('difference between rskelf and dense GMRES densities %5.2e\n\n',err) + + assert(err < 1e-12 || err < 10*tol); +end + +%% + +% evaluate at targets using adaptive quadrature + +opts = []; +opts.eps = tol; +opts.forceadap = true; +opts.verb = true; + +start=tic; +sol_adap = chunkerkerneval(chnkr,kernd,sig_rskelf,targets,opts); +t1 = toc(start); +fprintf('%5.2e s : time to eval at targs (slow, adaptive routine)\n',t1) + +relerr = norm(utarg-sol_adap,'fro')/(sqrt(chnkr.nch)*norm(utarg,'fro')); +relerr2 = norm(utarg-sol_adap,'inf')/dot(abs(sig_rskelf(:)),wts(:)); +fprintf('relative frobenius error %5.2e\n',relerr); +fprintf('relative l_inf/l_1 error %5.2e\n',relerr2); + +%% evaluate at targets using FLAM's ifmm + +opts = []; +opts.eps = tol; +opts.accel = true; +opts.verb = true; +opts.proxybylevel = proxybylevel; + +% remove fmm to force FLAM ifmm usage +kernifmm = kernd; +kernifmm.fmm = []; + +start=tic; +sol_ifmm = chunkerkerneval(chnkr,kernifmm,sig_rskelf,targets,opts); +t1 = toc(start); +fprintf('%5.2e s : time to eval at targs (ifmm)\n',t1) + +relerr = norm(utarg-sol_ifmm,'fro')/(sqrt(chnkr.nch)*norm(utarg,'fro')); +relerr2 = norm(utarg-sol_ifmm,'inf')/dot(abs(sig_rskelf(:)),wts(:)); +fprintf('relative frobenius error %5.2e\n',relerr); +fprintf('relative l_inf/l_1 error %5.2e\n',relerr2);