-
Notifications
You must be signed in to change notification settings - Fork 0
/
DMTR-22.tex
1005 lines (739 loc) · 87.1 KB
/
DMTR-22.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[DM,toc]{lsstdoc}
% Package imports go here
\usepackage{amsmath} % Advanced maths commands
\usepackage{amssymb}
\usepackage{gensymb} % degree symbol
\usepackage{cprotect}
% Local commands go here
%% Journal abbreviations
%\bibliographystyle{aasjournal}
\title[PDAC report]{Prototype Data Access Center: User Report}
\author{
K.~Suberlak,
\v{Z}.~Ivezi\'c,
and the PDAC team.}
\setDocRef{DMTR-22}
\setDocUpstreamLocation{\url{https://github.com/lsst-dmsst/DMTR-22}}
\date{2017-07-11}
\setDocAbstract{%
A report on user experience of the Prototype Data Access Center (PDAC). We test the quality and ease of access to the data. PDAC will pave the way to the Science User Interface and Tools (SUIT). We employ both in-detail study of individual objects, and a statistical study of an ensemble of objects. We evaluate user-friendliness of the current interface, and make recommendations for its future improvements.
}
% Change history defined here. Will be inserted into
% correct place with \maketitle
% OLDEST FIRST: VERSION, DATE, DESCRIPTION, OWNER NAME
\setDocChangeRecord{%
\addtohist{1.0}{2017-05-28}{Report complete.}{Krzysztof Suberlak}
\addtohist{1.1}{2017-07-11}{Release as DMTR-22}{T.~Jenness}
}
\begin{document}
% Create the title page
% Table of contents will be added automatically if "toc" class option
% is used.
\maketitle
\section{Introduction}
This is a document to report on the user experience testing of the Prototype Data Access Center on macOS Sierra Version 10.12.3 (16D32), using the Safari Version 10.0.3 (12602.4.8).
The Large Scale Synoptic Telescope (LSST) will produce a big volume of data. Such unprecedented data stream poses new challenges to provide an easy access for users, in such a way that they can quickly find what they need, and thus be able to focus on the science goal that they would like to achieve. The detailed description of technical requirements and a vision for the Science User Interface and Tools is outlined in documents \citeds{LDM-130} (SUIT requirements) and \citeds{LDM-492} (SUIT Vision)\footnote{These documents were recently superseded by \citeds{LDM-554} (Science Platform Requirements) and \citeds{LSE-319} (Science Platform Vision).}, as well as confluence pages\footnote{\url{https://confluence.lsstcorp.org/display/DM/Science+User+Interface+and+Tools}}. Current Prototype Data Access Center implementation of SUIT is described on the Guide to PDAC\footnote{\url{https://confluence.lsstcorp.org/display/DM/Guide+to+PDAC+version+1}\label{note1}}
An online tool that can serve as an interface between the user and the data is not a new idea. There is indeed a number of websites that serve such purpose, such as Aladin\footnote{\url{http://aladin.u-strasbg.fr}}, Sloan Digital Sky Survey (SDSS) Sci Server including CasJobs\footnote{\url{http://www.sciserver.org/tools/}}, NASA/IPAC InfraRed Science Archive (IRSA) \footnote{\url{https://irsa.ipac.caltech.edu/frontpage/}}, Mikulsky NASA Archive\footnote{\url{https://archive.stsci.edu}}, NASA Extragalactic Databased (NED)\footnote{\url{https://ned.ipac.caltech.edu}}. These allow a user to query for data (either via SQL query, or with the interactive user interface), returning the data table. Some user interfaces (eg. IRSA) have some rudimentary plotting capabilities. SUIT stands at the forefront of user interface development, that would go beyond displaying the time series, spectrum, and other static information, but would also allow to analyze the data on the cloud. For instance, a jupyter notebook could be employed to run machine learning models, and use the output to further select objects based on feature analysis. Currently National Optical Astronomy Observatory (NOAO) DataLab\footnote{\url{http://datalab.noao.edu}} is an example of a tool that can help explore the data in an interactive way, using allocated computational resources. Similar developments are advocated at the MIT Astroinformatics / Geoinformatics group \citep{Pankratius:7515118,Coster:2014:ION}.
This report details tests performed on the PDAC implementation of SUIT, including screenshots and data-based plots.
% Science Platform and PDAC development and deployment in 2017
% https://confluence.lsstcorp.org/display/DM/Science+Platform+and+PDAC+development+and+deployment+in+2017
% PDAC Meeting notes
% https://confluence.lsstcorp.org/display/DM/PDAC+Meeting+Minutes
\section{Overview of performed tests}
We test a variety of aspects of PDAC: the user interface, infrastructure, and database ingestion, focusing on the Sloan Digital Sky Survey Stripe 82 Forced Photometry LSST reprocessed dataset. In Section~\ref{sec:ui} we describe the functionality available through user interface. In Section~\ref{sec:infra} we describe the structure of available data: both datasets available directly from the NCSA (internal catalogs) , and data that is available from IRSA (external catalogs). In that section we also provide an overview of query and analysis methods available directly through the User Interface, as well as through SQL. Finally, in Section~\ref{sec:dg} we consider the quality of database ingestion, answering the question of how well was a given dataset loaded into PDAC. In particular we compare the S82 dataset, an outcome of the Summer 2013 reprocessing \citedsp{Document-15097}, to the same data stored locally at the University of Washington.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% SECTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{User Interface: what we see}
\label{sec:ui}
\subsection{UI overview}
%%%%%%%%%%%%%%%
% INTRODUCTION
%%%%%%%%%%%%%%%
In order to access PDAC we follow the directions\textsuperscript{~\ref{note1}} that include logging to NCSA via VPN \url{https://vpn.ncsa.illinois.edu/} using Cisco AnyConnect Secure Mobile Client, and opening in the web browser \url{http://lsst-sui-proxy01.ncsa.illinois.edu/suit}. This opens the main interface screen, which allows to select the database, and perform the desired query.
Currently, PDAC v1, in the upper-left corner of the interface, under tab 'LSST Data' (see Fig.~\ref{fig:PDAC_interface}) includes the Summer 2013 DM-stack reprocessed SDSS Stripe 82 data (database \verb|sdss_stripe82_00|), hosted at the NCSA on the LSST prototype ("integration cluster") hardware, in Qserv [Gregory Dubois-Felsmann, priv.comm. 02-20-2017, slack]. The only other locally stored database (as of March 2017), is WISE catalog, that is not yet accessible via the graphical user interface (it can be queried as Data Base \verb|wise_00|, with catalogs 'Object' containing objects (like DeepSource in S82 above), and 'ForcedSource' containing forced photometry (like DeepForcedSource in S82)).
The upper-left corner of the interface also leads to 'External Images' and 'External Catalogs'. The Catalogs are all NASA/IPAC\footnote{Infrared Processing and Analysis Center, \url{http://www.ipac.caltech.edu/project/lsst}} Infrared Science Archive(IRSA) publicly accessible catalogs, including Gaia, WISE, 2MASS, SPITZER, etc. (see Fig.~\ref{fig:PDAC_external_cat}).
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Range of input accepted by `Name or Position' box}
\begin{description}
\item[Description:] We test what is the range of RA, dec values and types of names accepted by the 'Name or Position' box (see Fig.~\ref{fig:PDAC_interface}
\item[Input:] As input we use a set of coordinates, and names: first in -180\degree <RA< 180\degree convention (ra,dec) = (-7.530128\degree,-1.171239\degree), then the same but in 0\degree<RA<360\degree convention: (ra,dec) = (352.469872\degree , -1.171239\degree) . Finally we use an objectId = 216471849679198456, present in the DeepSource table when querying this location with 2 arcsec radius. We Cone query DeepSource table with 2\arcsec search radius.
\item[Results:] Negative RA is not resolved (Fig.~\ref{fig:PDAC_test1}). Unless we use a name from NED / Simbad, the objectId is not resolved, even though it is present in the queried table.)
\item[Date:] 2/15/2017
\end{description}
\paragraph{Are all search options available in `Method Search'?}
\begin{description}
\item[Description:] The 'Method Search' dialog box contains 'Cone', 'Elliptical', 'Box', 'Polygon', 'Multi-Object', 'All Sky' options. We test whether each method works with simple input.
\item[Input:] We employ coordinates (ra,dec)=(352.469872 , -1.171239) as search region center. We use 'Cone' radius 2 arcsec, 'Elliptical' semi-major axis of 2 arcsec, 'Box' side of 2 arcsec, 'Polygon' default vertices (352.48041 -1.18156, 352.45985 -1.18156, 352.45984 -1.16073, 352.48040 -1.16073), 'Multi-Object': a list of two ra, dec submitted as a text file in the format ra, dec: ( 352.469872 , -1.171239 | 342.469872 , -1.101239). 'All Sky': we add and SQL constraint id = 216471849679198456. For all search methods we query DeepSource catalog.
\item Results: 'Cone', 'Elliptical', 'Box', and 'Polygon' search methods return a list of objects in coadds for a given search region. 'Multi-Object' provided with the radec.txt file returns an error 'Fail to load table. Error: edu.caltech.ipac.firefly.server.query.DataAccessException: DataAccessException:ERROR:Could not do Multi Object search, internal configuration wrong.: table should be a post search not a get from:unknown'. It would be more informative if an information about unavailability of this search method was on the main page rather than returning a result after a user uploaded and prepared an radec.txt file. The 'All Sky' method returns correct output (a list of selected fields where DeepSource.id =216471849679198456).
\item[Date:] 3/28/2017
\end{description}
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\includegraphics[width=\textwidth]{figs/1_PDAC_interface}
\caption{The main user interface of PDAC ver. 1. As of April 2017, Multi-Object and All Sky queries are not available. The 'Name or Position' only resolves positive RA ( 0\degree<RA<360\degree ), while using direct SQL query resolves both positive and negative RA ( -180\degree < RA < 180\degree ). Currently this is an inconsistency that we recommend to be addressed in the future. Furthermore, the names resolved have to be consistent with those present in NED or Simbad databases - any id's from the database queried (eg. 'id' in RunDeepSource, or 'objectId' in RunDeepForcedSource) are not yet resolved.}
\label{fig:PDAC_interface}
\end{figure*}
\begin{figure}
\begin{centering}
\includegraphics[width=0.5\columnwidth]{figs/2_PDAC_externals}
\caption{IPAC- hosted catalogs , accessible via IRSA.}
\label{fig:PDAC_external_cat}
\end{centering}
\end{figure}
\begin{figure*}
\includegraphics[width = \textwidth]{figs/1_PDAC_interface_a}
\caption{Testing range of input accepted by the 'Name or Position' box. The negative RA is not resolved, even though direct SQL query accepts both positive and negative values.}
\label{fig:PDAC_test1}
\end{figure*}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% SECTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Infrastructure: what is available and how to get it}
\label{sec:infra}
\subsection{Overview}
As we described in Section~\ref{sec:ui}, the main user interface allows access to the internally stored (at NCSA) SDSS Stripe 82 data reprocessed during the Summer 2013~\footnote{\url{https://confluence.lsstcorp.org/display/DM/Properties+of+the+2013+SDSS+Stripe+82+reprocessing}} as part of Data Challange with the continuously developed LSST Stack~\footnote{\url{https://pipelines.lsst.io/index.html}}.
The reprocessing included:
\begin{itemize}
\item coadding the data from all epochs in each of the ugriz SDSS filters. Measurements on coadds (per object) are available as \verb|RunDeepSource| table, accessible via Catalogs --> 'DeepSource' . The single-band coadded images with MariaDB metadata are available as \verb|DeepCoadd| table, accessibla via Images --> 'DeepCoadd' .
\item using i-band detections to seed forced photometry on all epochs in all bands. The results of photometry are available as \verb|RunDeepForcedSource| table, accessible via Catalogs --> 'Deep Forced Source' .
\item For reference , the individual calibrated single epoch images are available as \verb|Science_Ccd_Exposure| table, accessible via Images --> 'Science CCD Exposure'
\end{itemize}
Additional details of the schema are also outlined in the LSST Data Challenge Report \citedsp{LDM-226,DMTN-035}, and the LSST Database Schema \citedsp{LDM-153} Browser~\footnote{\url{http://ls.st/8g4}}.
Spatial queries that can be directly executed from the PDAC interface, called 'Method Search', include cone, box, elliptical and polygon (See Fig.~\ref{fig:PDAC_interface}). Spatial queries allow to choose a certain region of the sky by the object ra,dec coordinates.
Cone, elliptical, and box queries return objects in a region of the sky bound by a geometrical shape centered on given coordinates (ra,dec). Cone is the most useful type of query, allowing to find objects within a certain radius from the coordinate query. Elliptical search allows to define the shape by an ellipse with a given semi-major axis, position angle and the axis ratio. A box is a square centered on the query coordinates, with a given side size. A polygon allows to define the search region by between 3 and 15 coordinate pairs (vertices of the polygon). Note: Multi-object query is listed in the drop-down menu, but has not yet been implemented (March 2017) - in the future it will allow the user to upload a list of ra,dec and search radii, finding 1-to-1 matches in the existing catalog. An All-Sky option (no spatial constraints) has not been tested given the size of the database.
Any query returns a list of all objects within the given region (Fig.~\ref{fig:PDAC_cone_DS}).
\subsection{Tests Performed}
\subsubsection{Identification numbers in DeepSource and DeepForcedSource}
%%%%%%%%%%%%%%%
% INTRODUCTION
%%%%%%%%%%%%%%%
A certain limitation of the main UI is inability to resolve id's from the database itself (see Fig.~\ref{fig:PDAC_interface}). Indeed, the only way to find which objects have been detected in a certain small region in DeepSource coadds, and download light curves only for one of them from DeepForcedSource forced photometry catalog, is to use an SQL constraint. For example, we performed cone query against DeepSource table for ra,dec = 0.283437\degree, 1.178522\degree, 2\arcsec search radius (this is the RR Lyrae ID=13350 also investigated in Sec.~\ref{sec:periodogram}). Limiting the results to [\verb|id , coord_ra , coord_decl, flux_psf , coadd_id , coadd_filter_id|], we find that there is a coadd for each filter (denoted with \verb|coadd_filter_id|). The identification in i-band coadd (\verb|coadd_filter_id=3|) is id=3588818166880604. Note that while DeepSource has a separate id for a coadd in each band, only id's for i-band coadd are inherited by DeepForcedSource catalog. The DeepSource.id == DeepForcedSource.objectId, because DeepForcedSource.id stands for forced photometry detection id, which is unique for each epoch. Therefore a single object has one DeepSource.id, equal to DeepForcedSource.objectId, but multiple DeepForcedSource.id - we recommend to highlight this in the metadata for it is a potential area for confusion. The only way to currently recover a light curve for a single object from DeepForcedSource is to first select the detection id in DeepSource, and use that as a constraint when using cone query on DeepForcedSource (see Fig.~\ref{fig:PDAC_cone_DFS})
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Ease of selecting a light curve for a single object}
\begin{description}
\item[Description:] we test how difficult it is to select forced photometry light curve for only one object.
\item[Input:] Cone query DeepForcedSource catalog for ra,dec = 0.283437\degree, 1.178522\degree, with 2\arcsec search radius. Select forced photometry data products for a single object within the search radius.
\item[Result:] There is no option to show which objectId's are present in a given DeepForcedSource query, and somehow select only one. The simplest workaround is to query DeepSource against given coordinates, adding SQL constraint \verb|'coadd_filter_id = 3'| to return only i-band id's (because id's corresponding to coadds in other filters were not a seed of forced photometry, and only for i-band there is a correspondence DeepSource.id = DeepForcedSource.objectId). The i-band DeepSource.id is 3588818166880604. We then query DeepForcedSource against the same coordinates, adding SQL constraint \verb|'objectId' = 3588818166880604'|. This is quite confusing (id means something different for DeepSource and DeepForcedSource), and may cause some problems to users willing to download forced photometry for only one object within a given space region.
\item[Date:] 4/3/2017
\end{description}
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\includegraphics[width=\textwidth]{figs/4_PDAC_cone_DeepForcedSource}
\caption{The correct way to select a light curve for a particular object from the forced photometry DeepForcedSource catalog. Here we first queried against the DeepSource catalog to find id's for objects detected in coadds in a small region within 2\arcsec from ra,dec = 0.283437\degree, 1.178522\degree. For i-band there is only one id: 3588818166880604. Since DeepSource.id = DeepForcedSource.objectId, we require objectId to be equal to 3588818166880604. Thus we are able to acces forced photometry for precisely one object. Otherwise, obtaining a light curve from a direct spatial query of DeepForcedSource would provide all photometry for all objects detected in coadds within the search radius, which may not be the desired beavior for analysis of Time Series. We recommend that the result of spatial quary against RunDeepForcedSource should contain a summary of which unique objectId's are present, with an ability to select only one object (with multi-band photometry), if more than one is present in the search region. Otherwise it becomes a long-winded process to first find what id's were detected in coadds (DeepSource), to then select id for i-band coadd, and select only rows corresponding to that objectId in RunDeepForcedSource.}
\label{fig:PDAC_cone_DFS}
\end{figure*}
\subsubsection{Postage Stamp Miniatures}
\label{sec:miniatures}
%%%%%%%%%%%%%%%
% INTRODUCTION
%%%%%%%%%%%%%%%
We compared the postage stamp miniatures showing the overview of the region against which a given query was performed. We find that the miniature image does not always come from the catalog we query against. In fact, the "coverage" image comes from IRAS, DSS, 2MASS, or WISE - the survey is chosen depending on the size of the region needed to be shown [Xiuqin Wu, priv.comm., 2017]. Indeed, as the query region is increased, the shown image changes unexpectedly from DSS to IRAS or WISE, without issuing a relevant information to the user. A recommendation is to display information about the origin of the miniature images.
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Comparing the miniature images to SDSS DR13 Sky Server for a point source}
\begin{description}
\item[Description:] we test how well does a coverage image reflect the queried region.
\item[Input:] perform cone query against the Deep Source table, using coordinates of ra,dec = 23h30m57.31s, +1d1m13.8s ( or 352.73878\degree , 1.02049\degree ), with search radii of 2\arcsec, 10\arcsec, 100\arcsec, or 1000\arcsec. Each time, compare the coverage to the SDSS DR13 Sky Server \url{http://skyserver.sdss.org/dr13/en/tools/chart/navi.aspx}.
\item[Result:] we expected that the coverage image would be centered on a star, and indeed it was at 2\arcsec, 10\arcsec, 100\arcsec search radii. However, at 1000\arcsec radius (and larger), the image miniature drastically switches to using a different imaging survey without informing the user. It is much more blurry than before, impossible to recognize features that should be present at that scale as compated to the SDSS DR13 Sky Server.
\item[Date:] 3/7/2017
\end{description}
\paragraph{Comparing the miniature images to SDSS DR13 Sky Server for an extended source}
\begin{description}
\item[Description:] we test how well does a coverage image reflect the queried region for an extended source (a galaxy)
\item[Input:] perform cone query against the Deep Source table, using coordinates of a Galaxy ra,dec = 40.433\degree, 0.449\degree \arcsec. Each time, compare the coverage to the SDSS DR13 Sky Server \url{http://skyserver.sdss.org/dr13/en/tools/chart/navi.aspx}.
\item[Result:] we expected that the coverage image would be centered on a star, and indeed it was at 2\arcsec, 10\arcsec, 100\arcsec search radii. However, at 1000\arcsec radius (and larger), the image miniature drastically switches to using a different imaging survey without informing the user. It is much more blurry than before, impossible to recognize features that should be present at that scale as compared to the SDSS DR13 Sky Server.
\item[Date:] 3/7/2017
\end{description}
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\includegraphics[width=\textwidth]{figs/3_PDAC_cone_DeepSource}
\caption{Example cone query against Deep Source table, returning all objects detected within a radius of 10\arcsec from the position ra=352.469872\degree , dec=-1.171239\degree. Note that the background image (postage stamp miniature) does not show the actual S82 coadds. This particular feature is described further in Sec.~\ref{sec:miniatures}}
\label{fig:PDAC_cone_DS}
\end{figure*}
\subsubsection{Database linkage: obtaining magnitudes}
\label{sec:linkage_magnitudes}
%%%%%%%%%%%%%%%
% INTRODUCTION
%%%%%%%%%%%%%%%
A user querying a database would be very likely interested in obtaining calibrated magnitudes. We test the ease of obtaining coadd magnitudes for sources in a given location, and for obtaining light curves for a particular object.
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Obtaining the calibrated magnitudes for coadd images (Deep Source catalog)}
\begin{description}
\item[Description:] we test how easy it is to access with the user interface calibrated magnitudes for coadds for sources within a certain region.
\item[Input:] perform cone query against the Deep Source table, using coordinates of ra,dec = 23h30m57.31s, +1d1m13.8s ( or 352.73878\degree , 1.02049\degree ), with search radii of 10\arcsec. Seek to select magnitudes from the available field.
\item[Result:] negative. We expected to find a field 'g magnitude', or similar, to find magnitudes in a given filter. However, such field is not present. Currently, one can only obtain magnitudes via a direct SQL query\footnote{\url{https://confluence.lsstcorp.org/display/DM/PDAC+sample+queries+and+test+cases}\label{note2}}
\item[Date:] 4/27/2017
\end{description}
\paragraph{Obtaining the calibrated magnitudes for forced photometry images (Deep Forced Source catalog)}
\begin{description}
\item[Description:] we test how easy it is to access with the user interface calibrated magnitudes for forced photometry light curves for sources within a certain region.
\item[Input:] perform cone query against the Deep Source table, using coordinates of ra,dec = 23h30m57.31s, +1d1m13.8s ( or 352.73878\degree , 1.02049\degree ), with search radii of 10\arcsec. Seek to select magnitudes from the available field.
\item[Result:] negative. We expected to find a field 'g magnitude', or similar, to find magnitudes in a given filter. However, such field is not present. As in the case of coadd images, we can only obtain magnitudes via a direct SQL query\textsuperscript{~\ref{note2}}
\item[Date:] 4/27/2017
\end{description}
\subsubsection{External Images}
\label{sec:ext_images}
%%%%%%%%%%%%%%%
% INTRODUCTION
%%%%%%%%%%%%%%%
Apart from Stripe 82 reprocessed data, the user can also access via External Images tab image data from 2MASS, WISE, SDSS, MSX, DSS, IRAS (see Fig.~\ref{fig:ext_images})
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Are the miniatures from External Images properly centered?}
\begin{description}
\item[Description:] we test how well the object is rendered when querying for an image from External Images catalogs
\item[Input:] perform a NED resolved query for M3 globular cluster in Extended Images. Select 'Create New Plot'. Query SDSS u-band.
\item[Result:] we expected that the image would be centered on M3. In fact, it isn't - M3 appears off the center (see Fig.~\ref{fig:ext_images_m3}).
\item[Date:] 4/25/2017
\end{description}
\paragraph{Are the miniatures from External Images properly rotated?}
\begin{description}
\item[Description:] we test how well the large-scale extended object is rendered when querying for an image from various External Images catalogs
\item[Input:] perform a NED resolved query for M81 in External Images. Select 'Create New Plot'. Query WISE, 2MASS, SDSS and DSS surveys.
\item[Result:] we expected that all miniatures would be centered on M81. The miniatures are only approximately centered, and the SDSS image is rotated with respect to WISE or 2MASS. A different angular scale is seemingly shown in tiled view. (see Fig.~\ref{fig:ext_images_m81}). Update: selecting WCS option (see Fig.~\ref{fig:ext_images_m81_wcs}) shows that images are approximately properly rotated, but are still not oriented in the same fashion.
\item[Date:] 4/23/2017
\end{description}
\paragraph{Does the option of creating new color image from selected RGB frames work properly?}
\begin{description}
\item[Description:] we test the ease of use of External Images - 'Create New Plot - 3 Colors' user interface.
\item[Input:] in Extended Images: Create New Plot - 3 Colors, query for M81, and select SDSS R,G,U bands to stand for red, green, blue colors.
\item[Result:] the frames are properly added and the ability to select frames from other surveys (such as infrared) is really useful. However, the SDSS image still appears off-center (see Fig.~\ref{fig:ext_images_rgb} and ~\ref{fig:ext_images_rgb_query})
\item[Date:] 4/24/2017
\end{description}
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_images_menu.png}
\caption{External Images User Interface. The radio buttons on the top allow to create a new image, add another panel to an image created by the previous query, or create a three-color image. In this example, described in Sec.~\ref{sec:ext_images}, we query for an image of M81 , with the expected size of the image of 0.139 degrees.}
\label{fig:ext_images}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_images_m81_query.png}
\caption{External Images User Interface: a succession of queries against WISE, 2MASS, SDSS and DSS catalogs for an image miniature of M81. The SDSS miniatures are rotated with respect to the other catalogs.}
\label{fig:ext_images_m81}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_images_m81_query_wcs.png}
\caption{External Images User Interface: a follow-up to Fig.~\ref{fig:ext_images_m81}, showing tiles of WISE, 2MASS, SDSS u, DSS images. Selecting the 'WCS match' option, and clicking buttons to add grid layer to the image, and show the directions of Equatorial J2000 North and East, we see that the images are not all oriented in the same fashion. Using the ruler button we show the same distance on each tile to give a sense of scale.}
\label{fig:ext_images_m81_wcs}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_images_rgb.png}
\caption{External Images: 'Create New Plot - 3 Colors' main menu.}
\label{fig:ext_images_rgb}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_images_rgb_query.png}
\caption{External Images: the result of query for M81 in SDSS R,G,U filter data shown as RGB colors.}
\label{fig:ext_images_rgb_query}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_images_m3.png}
\caption{External Images: querying for an SDSS image of M3 Globular Cluster. It appears off the image center.}
\label{fig:ext_images_m3}
\end{figure*}
\subsubsection{External Catalogs}
\label{sec:ext_catalogs}
%%%%%%%%%%%%%%%
% INTRODUCTION
%%%%%%%%%%%%%%%
The existing interface allows access to data from External Catalogs, that is not physically present at the NCSA, but parsed through IRSA. We test the ease of obtaining Gaia and WISE data for simple NED-resolved locations.
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Can we obtain data from Gaia for a region specified by the cone query around a NED-resolved object?}
\begin{description}
\item[Description:] we test whether the data obtained from Gaia matches the location specified by the user. We choose M81 as our target for the ease of comparison to the miniature image shown in the results.
\item[Input:] perform a NED resolved query for M81 galaxy in External Catalog, with 100 arcsec search radius.
\item[Result:] positve. We see detections around M81 , and the miniature at this scale properly displays the queried region (see Fig.~\ref{fig:ext_catalogs_m81}).
\item[Date:] 5/3/2017
\end{description}
\paragraph{Can we obtain WISE data from External Catalogs for a crowded field?}
\begin{description}
\item[Description:] we test how the WISE data can be accessed via the UI.
\item[Input:] perform a NED resolved cone query for M33 in Extended Catalogs, selecting AllWISE Source Catalog, and 100 arcsec search radius.
\item[Result:] positive. The miniature image is approximately centered on M33, and it is in the infrared (as we can see from comparing Fig.~\ref{fig:ext_catalogs_m33} to 2MASS image from SDSS SkyServer on Fig.~\ref{fig:skyserver_m33}
\item[Date:] 5/3/2017
\end{description}
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_catalogs_menu.png}
\caption{External Catalogs: main menu. The user can select one of many projects (see Fig.~\ref{fig:PDAC_external_cat}): Gaia, WISE, 2MASS, SPITZER, IRAS< Planck, MSX, COSMOS, BOLOCAM, AKARI, USNO, DENIS, HERSCHEL, PTF, and others. Some projects have more than one associated catalog (eg WISE includes AllWISE, NeoWISE, etc).}
\label{fig:ext_catalogs}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_catalogs_m81.png}
\caption{External Catalogs: the result of querying for Gaia detections around the location of M81.}
\label{fig:ext_catalogs_m81}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_catalogs_m33.png}
\caption{External Catalogs: the result of querying AllWISE Source Catalog in the cone query around location of M33 - the Triangulum Galaxy. Note that the miniature image may be in infrared, which is why the spiral features are not easily discernible (compare to Fig.~\ref{fig:skyserver_m33} - the 2MASS cutout from SDSS Sky Server).}
\label{fig:ext_catalogs_m33}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/external_catalogs_m33_SkyServer.png}
\caption{SDSS DR13 Sky Server: image showing the 2MASS image of the region around the position of M33 galaxy: ra,dec = 23.46204, 30.66022. Compare that to the query against the location of M33 for AllWISE detections shown on Fig.~\ref{fig:ext_catalogs_m33}.}
\label{fig:skyserver_m33}
\end{figure*}
\subsection{Time Series: periodogram}
\label{sec:periodogram}
%%%%%%%%%%%%%%%
% INTRODUCTION
%%%%%%%%%%%%%%%
Ability to view the time series of any object is very useful for exploring its variability. The result of a query into Forced Deep Source table allows a simple 2D plot of any two data columns, and that could include time vs flux. However, the advantage of the Time Series View in the PDAC User Interface is that it also performs the cross-match against the Science CCD Exposure table to find the calibrated magnitudes, and allows calculating Lomb-Scargle periodogram to find the best fitting period.
Here we test the overall navigation experience in the Time Series User Interface. This includes finding the best periodicity with the Lomb-Scargle periodogram, folding the light curve on an accepted period, and changing the band in which the light curve is folded. We test the Time Series View on known RR Lyrae stars \citep{2010ApJ...708..717S}.
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Can we access the Time Series View easily?}
\begin{description}
\item[Description:] we test the access to the Time Series View: is the button always responsive, do we get the same result if querying twice?
\item[Input:] cone query the Deep Source catalog for a known RR Lyrae (ID=13350 in the catalog of \citep{2010ApJ...708..717S} ), at ra,dec = 0.283437\degree, 1.178522\degree, with 2 arcsec search radius, to find objectIds at this location in i-band (since only i-band coadd was the seed for forced photometry in all bands). To select only i-band coadd we need to add an SQL constraint \verb|coadd_filter_id = 3|. We find that the only objectId in i-band is 'id=3588818166880604'. We cone query Run Deep Forced Source table at the same position and search radius, adding an SQL constraint 'objectId = 3588818166880604' (since RunDeepSource.id == RunDeepForcedSource.objectId). We then select 'View Time Series' button (see Fig.~\ref{fig:time_series_cone_result}).
\item[Result:] positive. In most cases the Time Series View appears, with the three bottom windows missing any plots: a message 'Plot Failed = Could not create plot' appears (see Fig.~\ref{fig:time_series_viewer_initial}). It is not clear what the three windows were meant to display. It is easy to select a light curve in a different band for the same image with the radio buttons in the upper left corner.
\item[Date:] 4/7/2017
\end{description}
\paragraph{Can we recover the period of an RR Lyrae star using the Lomb-Scargle periodogram with default settings?}
\begin{description}
\item[Description:] the tool becomes useful if it can provide meaningful results. We test whether it is possible to find the true period of a known RR Lyrae star using the default settings.
\item[Input:] as in the 'can we access the Time Series View easily?' test, we cone query Deep Forced Source table for ra,dec = 0.283437\degree, 1.178522\degree, with 2 arcsec search radius, adding SQL constraint 'objectId = 3588818166880604'. Click 'View Time Series', select SDSS band 'g' (the default is u), click 'Find Period'. Select Periodogram Type 'Lomb-Scargle', Period Step Method 'Fixed Frequency'. Leave Number of Peaks at 50. Click 'Periodogram Calculation' (Fig.~\ref{fig:period_finder_initial}).
\item[Result:] negative. The default grid stretches too many possible periods, and does not recover the true period of 0.546 days (see Fig.~\ref{fig:period_finder_default}). We recommend implementing Periodograms for Multiband Astronomical Time Series (as in \cite{2015ApJ...812...18V}) to utilize the presence of near-simultaneous multi-band observations. Another way to improve is to allow several different present grid spacing settings, eg. AstroML-like, super-Nyquist, etc.
\item[Date:] 4/7/2017
\end{description}
\paragraph{Can we recover the period of an RR Lyrae star using the Lomb-Scargle periodogram with custom settings?}
\begin{description}
\item[Description:] we test whether it is possible to find the true period of a known RR Lyrae star using the custom settings. Given our knowledge of the range of true RR Lyrae periods, we constrain the maximum and minimum periods (frequency) for Lomb-Scargle Periodogram calculation.
\item[Input:] as in the 'can we access the Time Series View easily?' test, we cone query Deep Forced Source table for ra,dec = 0.283437\degree, 1.178522\degree, with 2 arcsec search radius, adding SQL constraint 'objectId = 3588818166880604'. Click 'View Time Series', select SDSS band 'g' (the default is u), click 'Find Period'. Select Periodogram Type 'Lomb-Scargle', Period Step Method 'Fixed Frequency'. Leave Number of Peaks at 50, enter Period Min = 0.229, and Period Max = 0.998 [days]. Click 'Periodogram Calculation'. Try also 0.29-0.9 range.
\item[Result:] positive. One of the major periodogram peaks corresponds to the true period of 0.546 days (see Fig.~\ref{fig:period_finder_custom}). By judiciously constraining the minimum and maximum periods searched (based on our knowledge of periods for this type of RR Lyrae stars) we can recover the correct period. However, the method is very sensitive to chosen frequency grid - small change in bounds leads to dramatic improvement / degradation in usefulness of periodogram (see Fig.~\ref{fig:period_finder_best_bounds}). We also recommend that the method used ('Fixed Frequency' or 'Fixed Period') should be displayed in the Viewer.
\item[Date:] 4/7/2017
\end{description}
\paragraph{Is there a documentation describing the algorithms used to calculate the periodogram?}
\begin{description}
\item[Description:] we look for documentation so that the user can better understand what type of Lomb-Scargle algorithm is used, since it would affect the outcome of the calculation
\item[Input:] search the Time Series view, or the results of the periodogram calculation, for any documentation.
\item[Result:] negative. We find no reference to which specific algorithms were used. From the PDAC team we found that the backend of the periodogram tool is a clone of the NASA Exoplanet Archive [Xiuqin Wu 2017, priv.comm.]. The PDAC implementation has been internally tested to comply with that original toolset, and the original documentation of NASA Exoplanet Archive is available at \url{http://exoplanetarchive.ipac.caltech.edu/docs/pgram/pgram_parameters.html}. We recommend that there be a link to this page providing the user with this crucial information.
\item[Date:] 5/4/17
\end{description}
\paragraph{Does the period selection bar correctly update limits and values?}
\begin{description}
\item[Description:] we test in selecting minimum and maximum periods in the Periodogram calculation whether the x-ticks of the period selection slider bar update correctly, and whether it is easy to change the values manually.
\item[Input:] as in "can we recover the period of an RR Lyrae star using the Lomb-Scargle periodogram with custom settings?". Check whether the x-ticks are appropriately updated (selected minimum on the far left of the slider bar, and selected maximum on the far right). Enter a different minimum value: 0.4 instead of 0.229, and check the behavior.
\item[Result:] negative. As in Fig.~\ref{fig:period_finder_default}, the minimum and maximum of the slider bar is always 0.001, 365 days. Furthermore, as in Fig.~\ref{fig:period_finder_custom}, we see that the period slider bar x-ticks do not correctly update to the custom-chosen period range.
\item[Date:] 5/4/17
\end{description}
\paragraph{Is it easy to fold the light curve on an accepted period in a different band?}
\begin{description}
\item[Description:] we test whether it is possible to fold the light curve on an accepted period found with one band (eg.u) but using the data from a different band (eg. g). This is useful because there should be approximately the same period detected throughout bands for many sources (eg. RR Lyrae).
\item[Input:] follow the steps as in "can we recover the period of an RR Lyrae star using the Lomb-Scargle periodogram with custom settings?". Choose the period closest to 0.5467 days. Click 'Accept period' in the lower right corner. In the new 'Viewer' window, click on a radio button corresponding to a different band (eg. r). Observe if the light curve in r-band is folded on the accepted period.
\item[Result:] negative. Once the period is accepted, we are brought to a 'Viewer' window, which shows the light curve folded in the original band on an accepted period. Clicking on r-band instead of folding the light curve in r-band on that period (as seems intuitive), shows the raw photometry, with no way of folding it on any period (see Fig.~\ref{fig:period_viewer_click_r}). Furthermore, the actual value of the accepted period is not displayed anywhere in the 'Viewer' window. We recommend:
a) once the user clicks 'accept period', clicking in the 'Viewer' window on radio buttons of u,g,r,i or z band would simply fold the light curve using the data from the selected band on the chosen period .
b) the accepted period ought be displayed somewhere in the 'Viewer' window, eg. on top of the plot, saying 'Accepted period = .... [days]'.
c) the windows on the bottom do not show anything informative - it would be useful if the user was informed what are they supposed to show.
d) the parameters used for periodogram calculation (minimum, maximum period) to be remembered within a single object Time Series View.
\item[Date:] 5/3/17
\end{description}
\paragraph{Is the periodogram tool very sensitive to boundary values?}
\begin{description}
\item[Description:] we test whether the current implementation of the periodogram tool is sensitive to the boundary values chosen for period.
\item[Input:] as in "can we recover the period of an RR Lyrae star using the Lomb-Scargle periodogram with custom settings?". Cone query the Deep Forced Source table for ra,dec = 0.283437\degree, 1.178522\degree, with 2 arcsec search radius, adding SQL constraint 'objectId = 3588818166880604'. Click 'View Time Series', select SDSS band 'g' (the default is u), click 'Find Period'. Select Periodogram Type 'Lomb-Scargle', leave Number of Peaks at 50, set Period Step Method to 'Fixed Frequency'. Enter Period Min = 0.22, and Period Max = 0.9 [days]. Click 'Periodogram Calculation'. Try also 0.3-0.9, and 0.29-0.9 range. Try changing to 'Fixed Period', using the same period bounds (0.22-0.9, 0.3-0.9, 0.29-0.9).
\item[Result:] positive. The periodogram tool is very sensitive to changes in boundaries: compare Figs. ~\ref{fig:period_finder_custom} and ~\ref{fig:period_finder_best_bounds}: 0.229-0.998 vs 0.29-0.9. We find that with the same bounds, the period step method yield also different results: compare pairs of 0.22-0.9 and 0.3-0.9 (left panel is 'Fixed Frequency', right is 'Fixed Period'). Surprisingly, even if the range itself is smaller, we may be less sensitive to the true period: compare 0.29-0.9 to 0.22-0.9 (Figs.~\ref{fig:period_finder_best_bounds} and ~\ref{fig:period_finder_0.22-0.9}) - the latter has a more dense grid, and yet the performance is strikingly different. We recommend to warn the user that at best the periodogram tool may help confirm the period, and that the default periodogram number of peaks may be insufficient.
\item[Date:] 5/5/17
\end{description}
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\includegraphics[width=\textwidth]{figs/PDAC_cone_result}
\caption{A result of cone query against Run Deep Forced Source table ( containing S82 S13 data), with (ra,dec,radius = 0.283437, 1.178522, 0.00055 degrees). We circle the 'View Time Series' button that links to the Time Series UI shown on Fig.~\ref{fig:time_series_viewer_initial}.}
\label{fig:time_series_cone_result}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/PDAC_viewer_initial}
\caption{Time Series Viewer for an objectId=3588818166880604, at ra, dec = 0.283437\degree, 1.178522\degree . Note that initially on the bottom there are three empty panels. The radio buttons in the upper left corner allow intuitive selection of the SDSS filter for the periodogram calculation. We choose g-band, and click 'Find Period', marked with the red oval, to calculate Lomb-Scargle periodogram for that band (this takes the user to Fig~\ref{fig:period_finder_initial})}
\label{fig:time_series_viewer_initial}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_finder_initial}
\caption{Calculating Lomb-Scargle periodogram for an objectId=3588818166880604, at ra, dec = 0.283437\degree, 1.178522\degree. The slider in the upper left corner allows to fold the light curve on a chosen period. Clicking 'Calculate Periodogram' button opens the dialog window 'Periodogram'. Currently it contains only the Lomb-Scargle Periodogram Type. Period Step Method include Fixed Frequency or Fixed Period, similar to the NASA Exoplanet Periodogram Tool (Fig.~\ref{fig:exoplanet_archive}). If we don't choose anything for maximum and minimum periods, the calculation will proceed with defaults, which for this RR Lyrae fails to detect the true period (Fig.~\ref{fig:period_finder_default}). If we choose the minimum and maximum periods knowing what period to expect for a given class of object, we are more likely to detect the true period (~\ref{fig:period_finder_custom}), although the method itself is very sensitive to frequency grid. Clicking on 'Periodogram Calculation' proceeds with evaluating Lomb-Scargle periodogram with chosen Period Step Method using the default frequency grid (see Fig.~\ref{fig:period_finder_default})}
\label{fig:period_finder_initial}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_finder_defaults}
\caption{Calculating Lomb-Scargle periodogram for an objectId=3588818166880604, at ra, dec = 0.283437\degree, 1.178522\degree. Using the default settings does not recover the true underlying period of 0.547987 days. See Fig.~\ref{fig:period_finder_custom} for a more appropriate choice of period range.}
\label{fig:period_finder_default}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_finder_fix_freq_0229-0998}
\caption{Calculating Lomb-Scargle periodogram for an objectId=3588818166880604 at ra, dec = 0.283437\degree, 1.178522\degree . When we appropriately constrain the frequency grid on which the powers of periodogram are evaluated, we recover the period close to the true period of 0.547987 days. On this figure we choose fixed frequency method, with $P_{min} = 0.229 $ and $P_{max} = 0.998$ days, which are 90\% of the smallest and 110 \% of the largest RR Lyrae periods in ~\citep{2010ApJ...708..717S} sample. Note that as of April 2017, the minimum and maximum value of a slider allowing to interactively fold the light curve on any period does not update to the values used in the Periodogram search. However, slight change of frequency range can drastically improve the calculation: see Fig~\ref{fig:period_finder_best_bounds}.}
\label{fig:period_finder_custom}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_finder_fix_freq_029-09_best}
\caption{Calculating Lomb-Scargle periodogram for an objectId=3588818166880604 at ra, dec = 0.283437\degree, 1.178522\degree. Slight change of frequency grid can heavily affect the outcome: here we use fixed frequency, with period chosen between 0.29 and 0.9 days.
Clicking 'Accept Period' takes the user to Fig.~\ref{fig:period_viewer_accept_period}.}
\label{fig:period_finder_best_bounds}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_viewer_fix_freq_029-09_best}
\caption{The result of accepting the period found by limiting the periodogram bounds by $P_{min} = 0.29 $ and $P_{max} = 0.9$ days, for an RR Lyrae ID=13350 (objectId = 3588818166880604 , at ra, dec = 0.283437\degree, 1.178522\degree). It is not clear what the miniatures show: different epochs? The light curve is correctly folded on g-band data. A surprising behavior here is that selecting a different band (eg 'r'), instead of folding the light curve in that band on the accepted period, the raw data is displayed (Fig.~\ref{fig:period_viewer_click_r})}
\label{fig:period_viewer_accept_period}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_viewer_fix_freq_029-09_best_click_r}
\caption{The result of clicking on the radio button for the r-band data. Instead of showing the r-band light curve folded on the accepted period, we see the raw r-band data. Clicking the 'Find Period' button does not 'remember' the result of the previous search on the same data.}
\label{fig:period_viewer_click_r}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_finder_022-09_freq_period}
\caption{Calculating Lomb-Scargle periodogram for an objectId=3588818166880604 at ra, dec = 0.283437\degree, 1.178522\degree. Bounding period between 0.22-0.9 days, the left panel used 'Fixed Frequency', and the right panel 'Fixed Period'.}
\label{fig:period_finder_0.22-0.9}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Period_finder_03-09_freq_period}
\caption{Calculating Lomb-Scargle periodogram for an objectId=3588818166880604 at ra, dec = 0.283437\degree, 1.178522\degree. Bounding period between 0.3-0.9 days, the left panel used 'Fixed Frequency', and the right panel 'Fixed Period'.}
\label{fig:period_finder_0.3-0.9}
\end{figure*}
Finally, we compare the PDAC Time Series User Interface to that of the NASA Exoplanet Archive Periodogram\footnote{http://exoplanetarchive.ipac.caltech.edu/cgi-bin/Pgram/nph-pgram} (see Fig.~\ref{fig:exoplanet_archive}). Using few RR Lyrae PDAC g-band light curves, each calculation is allocated a time slot of approximately 15 seconds. Also see Table ~\ref{tab:periods} for a summary of results.
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Exoplanet_archive_periodogram}
\caption{ The same object as Fig.~\ref{fig:RRLyr_13350_Sesar}, and Fig~\ref{fig:RRLyr_13350_PDAC}, using the SDSS data from \citep{2010ApJ...708..717S}. The highest significance frequency peak (power 21.58) corresponds to a period of 0.35365194 days. Only the second in significance peak (power 20.62) corresponds to the 'true' period of 0.547969 \citep{2010ApJ...708..717S}. Note the bottom-left corner: the calculation took 15 secs for one light curve (compare to few miliseconds of AstroML code naive single-sinusoid approach that gave the same result for this particular object)}
\label{fig:exoplanet_archive}
\end{figure*}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% SECTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Database Ingestion: is what we get what we expected to get?}
\label{sec:dg}
\subsection{Single forced photometry light curve}
We compare a forced photometry light curve for a single object from the S82 dataset between the PDAC copy and the locally stored version (at the University of Washington - UW). We used an objectId = 216172782516437336, in patch \verb|g00_22|, with 72 forced photometry epochs in g-band. From available detection data (analoguous to Deep Source table at PDAC) we find ra, dec = 359.974019436\degree, -1.25626927667\degree, extendedness= 1.
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Can we query Deep Source table by objectId?}
\begin{description}
\item[Description:] we have both the ra, dec coordinates and objectId of a source. We test if it is possible to query solely by objectId, without having to specify ra, dec in cone query.
\item[Input:] cone query the Deep Source catalog for an objectId = 216172782516437336 at ra, dec = 359.974019436\degree, -1.25626927667\degree , search radius 3 arcsec. Add an SQL constraint \verb|coadd_filter_id = 3|, \verb|objectId = 216172782516437336|. Try to query without specifying ra,dec.
\item[Result:] negative. It is necessary to specify ra,dec, even if the objectId is known. However, we find the sought object. We recommend that querying by objectId be implemented in the next release of the UI .
\item[Date:] 5/22/2017
\end{description}
\paragraph{Is the number of photometric points between UW and PDAC copy of S82 dataset identical?}
\begin{description}
\item[Description:] we compare the local copy of the g-band photometry for objectId = 216172782516437336 vs PDAC hosted version.
\item[Input:] cone query the Deep Forced Source catalog for an objectId = 216172782516437336 at ra, dec = 359.974019436, -1.25626927667 , search radius 3 arcsec. Add an SQL constraint \verb|id = 216172782516437336| and \verb|exposure_filter_id = 1|.
\item[Result:] positive. This query yields 73 epochs as compared to UW 72 epochs, but upon examination 1 epoch of the PDAC result (exposureid 6518110430, \verb|exposure_time_mid| = 54040.248144284444 ) has \verb|flux_psf_err| \verb|flux_psf| = None. Recommendation: forced flux measurements where flux is not available should not be returned to the user without warning, or an appropriate information (eg. 'for this object there are also N epochs where the reported \verb|flux_psf| is None, available here'). After removing the non-detection, the number of epochs for forced photometry measurements in PDAC-hosted light curve is identical to the local copy.
\item[Date:] 5/24/2017
\end{description}
\paragraph{Are the time stamps for each forced photometry epoch the same?}
\begin{description}
\item[Description:] we compare the light curve resulting from test above, focusing on the reported time per exposure: are the times identical between PDAC and UW?
\item[Input:] light curve from PDAC and light curve from UW for objectId = 216172782516437336.
\item[Result:] negative. The UW light curve has a mean offset of 26.95 seconds vs PDAC light curve for the same object: approximately half of the mean exposure time. UW data records the beginning of the exposure, whereas PDAC: the middle of the exposure. Although metadata for UW does not allow to guess that information (calling time column 'mjd'), the PDAC column name makes this distinction more clear by calling time \verb|'exposure_time_mid'|. Recommendation: given that it causes a small discrepancy in coincidence measurements, it would be prudent to perhaps provide both the beginning of the exposure and the middle of the exposure, unless providing the time of the beginning is a very nonstandard practice. Note that the mjd data stored at PDAC has more significant digits (12-13 in PDAC data compared to 6-7 at the UW data).
\item[Date:] 5/25/2017
\end{description}
\paragraph{Are the units for flux measurements between PDAC and UW the same?}
\begin{description}
\item[Description:] we test whether the flux units returned by PDAC query of Run Deep Forced Source table allows for direct comparison with the UW data, and if not, whether it is possible to make them comparable using only the UI.
\item[Input:] query against the objectId = 216172782516437336, using raw flux.
\item[Result:] negative. Data at UW is already calibrated for zero-point, thus is stored in ergs/sec/cm2/Hz, whereas the PDAC data is stored in ADUs. It is currently impossible to obtain calibrated fluxes using the PDAC UI: an example of an SQL query to obtain calibrated fluxes, requiring a join between RunDeepForcedSource table, for the information about the individual observations, and the \verb|Science_Ccd_Exposure| table (for a per-visit-image photometric zero point), is on the PDAC Sample Queries. Recommendation: allow the user to query RunDeepForcedSource and offer options to return uncalibrated flux measurements, or a calibrated flux making a join query with \verb|Science_Ccd_Exposure| in the background.
\item[Date:] 5/27/2017
\end{description}
\paragraph{Are the calibrated fluxes from PDAC identical as those at the UW?}
\begin{description}
\item[Description:] we compare the calibrated fluxes obtained by SQL query against RunDeepForcedSource and \verb|Science_Ccd_Exposure| table in g-magnitude for the objectId = 216172782516437336, and the UW hosted forced photometry data.
\item[Input:] calibrated flux light curve for objectId = 216172782516437336, using SQL \verb|scisql_dnToFlux| and \verb|scisql_dnToFluxSigma| functions to convert raw to calibrated fluxes (see Sec.~\ref{sec:sql_single_flux} for the SQL query).
\item[Result:] positive. If we remove the missing photometry datapoint from PDAC, then there is an identical number of epochs. The mean residual difference between PDAC and UW calibrated flux data is zero, and all the difference can be ascribed to floating point arithmetic (different accuracy of data stored at UW and PDAC). See Fig.~\ref{fig:single_flux_difference} for the histogram of difference between UW and PDAC fluxes. Mean flux ratio is 1.0000003263832988 . On Fig.~\ref{fig:single_calib_flux} we plot the PDAC and UW data as time series, with the residuals representing flux difference between the two sources of data.
\item[Date:] 5/27/2017
\end{description}
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\begin{centering}
\includegraphics[width=0.7\textwidth]{figs/Single_UW_vs_PDAC_flux}
\cprotect\caption{Histogram of flux difference between data stored at PDAC vs UW for objectId = 216172782516437336. The difference is less than 1E-35, i.e. on the level of the floating point error. This proves that once fluxes are correctly calibrated (using the zero point magnitudes from \verb|Science_Ccd_Exposure| table), light curves at PDAC represent approximately identical information as those stored at UW.}
\label{fig:single_flux_difference}
\end{centering}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Single_raw_LC_comparison}
\caption{Calibrated flux for objectId= 216172782516437336, same as Fig.~\ref{fig:single_flux_difference}. Light curves at PDAC and UW are identical once brought to a common unit system. The bottom panel shows the difference between PDAC and UW light curve - the values are multiplied by 1E5 to show the difference (in units of 1E-29 ergs/sec/cm2/Hz, so that the order of flux difference is 1E-34).}
\label{fig:single_calib_flux}
\end{figure*}
Forced photometry measurements may report flux values that are below zero, or are very noisy (S/N < 2). For instance, for objectId=216172782516437336, 24 epochs have S/N < 2. We developed a pipeline\footnote{\url{https://github.com/suberlak/Faint_pipeline_report}} that imposes a uniform prior on each flux measurement in accordance with the non-physicality of negative flux values. Thus we provide re-estimate of the faint flux (S/N < 2) measurement, and a magnitude for each forced-photometry epoch. We constrast that with the PDAC \verb|scisql_dnToAbMag| function which simply removes the points with negative flux from the light curve. The impact of this pipeline on the UW data is shown in Fig.~\ref{fig:single_faint_flux_UW}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Single_UW_raw_LC_faint_flux}
\caption{Light curve from UW for objectId=216172782516437336, highlighting the faint points, with S/N < 2. The green points show the effect of applying a non-negative Bayesian prior on forced photometry.}
\label{fig:single_faint_flux_UW}
\end{figure*}
Given that as we have shown, the flux measurements contained in light curves at PDAC and UW are identical, we show the impact of correction of faint flux measurement on PDAC data on Fig.~\ref{fig:single_faint_flux_PDAC}.
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Single_PDAC_raw_LC_faint_flux}
\caption{Same as Fig.~\ref{fig:single_faint_flux_UW}, but using PDAC data.}
\label{fig:single_faint_flux_PDAC}
\end{figure*}
Given the impact of correction on flux, magnitudes would also be affected. We show the impact of corrected-flux magnitudes vs the scisql function magnitudes. There is no change where S/N > 2, but for faint points, the corrected fluxes yield smaller magnitudes. On Fig.~\ref{fig:single_faint_detail_mag} we juxtapose the PDAC SQL-computed magnitudes (using SQL query in Sec.~\ref{sec:sql_single_mag}), vs. magnitudes based on PDAC SQL-computed fluxes, corrected using custom pipeline wherever S/N < 2.
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Single_PDAC_g_mag_compare_detail}
\cprotect\caption{A section of light curve for objectId=216172782516437336, comparing the magnitudes for PDAC data calculated with the built-in \verb|scisql_dnToAbMag| and \verb|scisql_dnToAbMagSigma| functions (orange), and the magnitudes based on PDAC calibrated fluxes, treated with the faint fluxes pipeline (blue). Note that in cases where PDAC flux was below zero, the built-in scisql pipeline ignores these points in magnitude calculation, since magnitude is undefined for negative flux. Using our Bayesian pipeline, we put a better estimate on the upper limit of the flux measurement.}
\label{fig:single_faint_detail_mag}
\end{figure*}
In conclusion, based on the case of objectId = 216172782516437336, light curves from UW and PDAC are identical to the precision afforded by the floating point error. However, accessing the PDAC calibrated magnitudes is nontrivial: we recommend that the functionality of executing a joint query between DeepForcedSource and \verb|Science_CCD_Exposure| to return calibrated fluxes and magnitudes be accessible via the UI. In addition, assumptions that come into computing magnitudes from fluxes should all be displayed and available to the user (eg. where the zero point magnitudes are stored to calculate calibrated fluxes, how only non-negative flux values are used to calculate magnitudes, what formulae are used in that conversion). This would greatly improve the educational value of SUIT. Furthermore, user experience would significantly improve if it were possible to query either DeepSource or DeepForcedSource directly by the objectId , without the need to specify ra, dec.
\subsection{Positional comparison: box query}
We compare the source density between the PDAC and UW S82 datasets. This is a simple test that can help confirm whether the two datasets are identical, as we would expect. Any irregularities and information on how they differ would highlight a particular aspect of the PDAC S82 dataset that should be brought to the attention of the user. Steps required to make this comparison also highlight direct ways to improve the user experience. The UW data is a deep source coadd catalog, limited at i < 23.5, containing all detected sources in i-band. The PDAC Run Deep Source catalog contains coadds in each of the five bands, and is not limited in magnitude depth.
%%%%%%%%%
% TESTS
%%%%%%%%%
\paragraph{Can we apply the magnitude cutoff in a box query within the UI?}
\begin{description}
\item[Description:] limiting the maximum magnitude of sources returned in a query is an important functionality - given the understanding about the limiting magnitude of the survey, the user may wish to select a good quality measurements by simply selecting the cutoff magnitude below the limiting magnitude of the survey. Although coadds greatly increase the survey depth, there is a lot of faint sources for which data quality may be insufficient for scientific purpose. The magnitude cutoff also speeds up the query, since the number of sources grows exponentially with magnitude.
\item[Input:] query Run Deep Source using box query with ra, dec limited by (325, -0.7) and (-0.6, 325.1). Use SQL constraint \verb|coadd_filter_id = 3 |. Seek to limit the i-band magnitude to 23.5.
\item[Result:] negative. We recommend that in the future version of the PDAC, obtaining calibrated fluxes and magnitudes for objects in both Deep Source and Deep Forced Source tables be available via the UI. An additional feature could be allowing the user to limit the magnitude of returned objects. This is currently only possible with an SQL query (see Sec.~\ref{sec:sql_box}, using \verb|HAVING i<23.5| clause, where \verb|i| magnitude is the outcome of join between DeepForcedSource and \verb|Science_CCD_Exposure|.
\item[Date:] 5/30/2017
\end{description}
\paragraph{Is the source density the same between UW and PDAC data?}
\begin{description}
\item[Description:] we compare the source density in the S82 dataset using PDAC Run Deep Source catalog and the locally hosted (UW) Deep Source catalog. The local catalog is cut at i < 23.5 mag. The two should agree on the number of sources in the same area of the sky.
\item[Input:] Use box query against the PDAC S82 data. The query box in the SQL \verb|qserv_areaspec_box| function requires minimum and maximum ra,dec bounds. Use 325 $\degree$, 325+D$\degree$, and -0.7$\degree$, -0.7+D$\degree$, with D=[0.1, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4 ]. This specifies box queries with increasing area (see Fig.~\ref{fig:box_area_illustrate}). Measure the query speed including the download of parent id and object id, i-band coadd magnitude and error. Limit the query depth to i< 23.5 to mirror the UW data. Select only i-band coadd by specifying \verb|RunDeepSource.coadd_filter_id = 3 |. Measure the time it takes to query the Qserv and download the data. Count the number of unique id in the query. Compare to the number of objects in the same area from the UW S82 Deep Source catalog.
\item[Result:] negative. Number of sources in PDAC per chosen area, constrained in the same way as the UW catalog, is $\approx15\% $ higher (see Tab.~\ref{tab:source_count} for detailed results). This is the result of choosing whether to include deblender parents. In all cases, we find that the 'id' of extra sources at the same location as sources matched to UW catalog are the same as 'parentDeepSourceId' in the UW catalog. We recommend that this issue be explained to the user in more detail, in particular the meaning of \verb|parent|, or \verb|deblend_n_child| column names. Otherwise searches for objects within a certain area may yield unexpected results. As predicted, the measured query time increases with the size of the queried area (Fig.~\ref{fig:box_query_time}), allowing within less than a minute to investigate the area of a few square degrees. See Sec.~\ref{sec:sql_box} for an example of an SQL query used.
\item[Date:] 5/29/2017
\end{description}
\begin{table}
\centering
\caption{Comparison of the number of sources in the same area between PDAC and UW hosted versions of the Deep Source Coadd catalogs. The number of unique id's is considered to be a proxy for the number of sources. If we exclude objects that are parents of the UW sources at the same location, we come closer to resolving the discrepancy (we remove rows where 'id' is the same as a 'parent' of another object). However, there are still objects which may or may not have any match in the UW catalog. Those could be 'parent' sources of 'child' sources that are too faint to be included in the query (i<23.5 limit).}
\label{tab:source_count}
\begin{tabular}{ l|rrr}
\hline
Area [sq.deg] & N(PDAC) & N(UW) & N(PDAC) no parents \\
\hline
0.01 & 731 & 562 & 583 \\
0.04 & 3020 & 2273 & 2350 \\
0.16 & 12320 & 9170 & 9533 \\
0.36 & 25854 & 19154 & 19909 \\
0.64 & 44880 & 33074 & 34371 \\
1.00 & 69374 & 51077 & 52989 \\
1.44 & 98871 & 72876 & 75506 \\
1.96 & 133916 & 99007 & 102630 \\
\end{tabular}
\end{table}
% Explanation
%%%%%%%%%%%
% FIGURES
%%%%%%%%%%%
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Box_area_comparison}
\caption{The area of comparison between the PDAC and UW S82 Deep Coadd catalogs. In the background the S82 source counts from the UW catalog. Each black square illustrates the query area.}
\label{fig:box_area_illustrate}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Box_query_deepsource_i_lt_235}
\caption{Result of a box query against PDAC S82 dataset with increasing box size. We plot query area in square degrees and the number of sources in PDAC contained in that area against query time. There is a scatter in query time corresponding to fluctuating network speed, thus we fit the polynomial to the median query time to highlight the increasing trend. This also emphasizes the importance of allowing the user to limit the query magnitude: otherwise the number of sources resulting from a query may be prohibitively large.}
\label{fig:box_query_time}
\end{figure*}
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Box_compare_sources}
\cprotect\caption{Comparing sources in a very small area (1.44 square arcminutes) between PDAC and UW hosted Deep Source catalogs, bounded by a Polygon with vertices (325.0 -0.7, 325.02 -0.7, 325.02 -0.68, 325.0 -0.68). Blue dots represent sources with id shared between the two catalogs. Orange dots represent sources with id only present in the PDAC sources catalog. These are child sources with a parent at the same location. For example, consider sources id = 217571363454454701 and 217571363454457757, both located within 1 arcsec from ra,dec = 325.008953599611, -0.6820032359336851 . Both are brighter than i = 23.5 mag, but 217571363454454701 is the parent, and also has 4 other deblender children ( \verb|deblend_nchild = 5| for 217571363454454701), that are fainter than i = 23.5 mag.}
\label{fig:box_deblended_sources}
\end{figure*}
% select deep source id's within a certain region
\subsection{Light curve comparison: multiple cone queries}
\label{sec:rr_lyrae}
We assess whether the Lomb-Scargle periodogram can be safely used to determine periods for the data similar in cadence and sparseness to the S82 S13. This may inform the choice of preset settings for the Period Search tool in the Time Series View in PDAC UI.
We use as the 'true' period the best-fit results from detailed template fitting \citep{2010ApJ...708..717S}. Both fit parameters and SDSS DR2 light curves used in \citep{2010ApJ...708..717S} are publicly accessible in the online version of the journal.
%\footnote{\url{http://iopscience.iop.org/article/10.1088/0004-637X/708/1/717/meta#apj326724t2}}.
We obtain the S82 S13 calibrated g-magnitude light curves from PDAC. This requires simultaneous cone query of Deep Forced Source catalog at the given position, and cross-matching against Science CCD Exposure, currently only available via SQL query. As shown on Fig.~\ref{fig:PDAC_query_ra_dec}, only 343 of 483 RR Lyrae are at locations within the PDAC S82 S13 dataset. For each ligh curve, using the \textit{astroML} python module \citep{VanderPlas:6382200}, we calculate the Lomb-Scargle periodogram powers on a uniform frequency grid of 5000 frequencies spanning between $\omega_{min} = 0.9 ( 2 \pi / P_{max})$, $\omega_{max} = 1.1 ( 2 \pi / P_{min})$, where $P_{min}$ and $P_{max}$ correspond to $90 \%$ of the smallest and $110\%$ of the largest periods in the catalog. Thus we are constraining the periodicities searched exactly where we expect them to be. For periodogram we assess the significance of the peak by performing 500 bootstrap resamplings\footnote{\url{http://www.astroml.org/book\_figures/chapter10/index.html}}. We use the generalized Lomb-Scargle as the calculation mode, following the defaults in \textit{astroML} (see also Eq.20 in \citep{2009A&A...496..577Z}, and Section 10.3.2 in \citep{2014sdmm.book.....I}).
Lomb-Scargle periodogram does not always find the 'true' period - it is subject to non-uniform sampling, aliasing, and necessity of choosing well the frequency sample on which periodogram powers are evaluated (see ~\cite{2017arXiv170309824V} for a recent overview). We nevertheless perform few simple sanity checks:
\begin{enumerate}
\item Does the PDAC light curve folded on the 'true' period look physical?
\item Using naive Lomb-Scargle, can we find this period using the SDSS DR2 data originally used by \citep{2010ApJ...708..717S}?
\item Using naive Lomb-Scargle, can we find this period using the S82 S13 PDAC data?
\end{enumerate}
We show that even with such a densely sampled Lomb-Scargle periodogram (100 times denser than the default 50 powers calculated in the PDAC Time Series Viewer), choosing a very appropriate frequency range we are able to recover the true period only for about a half of the tested stars (see Fig.~\ref{fig:RRLyr_period_ratios})
We summarize the results of Lomb-Scargle calculation on SDSS DR2 data into following groups: where with the LS we find the same period (Fig.~\ref{fig:RRLyr_13350_Sesar}), a smaller period (Fig.~\ref{fig:RRLyr_4099_Sesar}), , or a bigger period (Fig.~\ref{fig:RRLyr_470994_Sesar}) than the ground truth. This proves that with SDSS DR2 data it would be hard to use solely LS for period finding.
Using the PDAC S82 S13 data we also calculate LS periodogram for the same objects - see Figs. ~\ref{fig:RRLyr_13350_PDAC}, \ref{fig:RRLyr_4099_PDAC} and ~\ref{fig:RRLyr_470994_PDAC}.
%%%%%%%%%%
% FIGURES
%%%%%%%%%%
% need figures:
% 1) LC from PDAC 4-panel plot (folding on the LS period on PDAC and folding on the true period )
% 2) LC from DR2 (Sesar+2010) (folding on the LS period on DR2, and folding on the true period )
% Use the same star for 1) and 2) . 1) will have likely more points, whereas 2) less, since 2) was taken much earlier ( DR2 vs DR9 )
% We want to show that the true period is 'real', and that it can be recovered with LS, although there may be cases where it fails.
% RR Lyrae periods
\begin{figure*}
\begin{centering}
\includegraphics[width=0.7\columnwidth]{figs/Fig_1_Sesar_2010_RRLyr}
\caption{ Distribution of RR Lyrae periods for 483 objects in \citep{2010ApJ...708..717S}. Note the bimodal distribution, reflecting two main RR Lyrae types: 309 RRab (right) and 104 RRc (left) (see also Fig.16 in \citep{2010ApJ...708..717S}).}
\label{fig:RRLyr_distribution}
\end{centering}
\end{figure*}
% how well LS works using DR2 data
\begin{figure*}
\begin{centering}
\includegraphics[width=0.7\columnwidth]{figs/Fig_3_RRLyr_omega_ratio}
\caption{The distribution of the ratio of $\omega_{true}$ to $\omega_{fit}$. $\omega_{true} = 2 \pi / P_{true}$, is the frequency corresponding to the 'true' period as found by Sesar+2010 (see Table 2 in \citep{2010ApJ...708..717S}). $\omega_{fit} = 2 \pi / P_{fit}$ corresponds to the highest peak in the Lomb-Scargle periodogram evaluated on Sesar+2010 data - SDSS DR2. $\omega_{true} / \omega_{fit}$ is approximately equal to 1, when the naive LS approach is able to recover the 'true' period. When this ratio is smaller or greater than 1, it means that the period recovered from the LS method is respectively shorter or longer than the 'true' period. This may be caused by the inherent simplicity of the simple single-term Fourier Series fitting. Indeed, some RR Lyrae light curves may have shapes that are insufficiently described by a single sinusoid (as on Fig.10.18 in \citep{2014sdmm.book.....I}). See Figs.~\ref{fig:RRLyr_13350_Sesar}, ~\ref{fig:RRLyr_4099_Sesar} and~\ref{fig:RRLyr_470994_Sesar} for details of evaluating LS on SDSS DR2 light curves.}
\label{fig:RRLyr_period_ratios}
\end{centering}
\end{figure*}
% which RR Lyr we could test
\begin{figure*}
\includegraphics[width=\columnwidth]{figs/missing_and_recovered_RR_Lyr_PDAC}
\caption{Results of positional query against 483 RR Lyrae stars from ~\citep{2010ApJ...708..717S}, using their $\mathrm{RA}, \mathrm{Dec}$. Blue dots are 343 stars that have a match in the PDAC S82 dataset within 2 arcsec, and red crosses are 140 stars that did not. Increasing the search radius to 3 arcsec does not alter this result.}
\label{fig:PDAC_query_ra_dec}
\end{figure*}
% SDSS DR2 vs PDAC DR9 data ...
\begin{figure*}
\includegraphics[width=\columnwidth]{figs/Sesar_vs_PDAC_lc_4099}
\caption{Comparison of RR Lyr ID=4099 from ~\citep{2010ApJ...708..717S} (red crosses), and PDAC (blue crosses). The two light curves have different length: 59 vs 162 points, respectively.}
\label{fig:PDAC_SDSS_compare_LC}
\end{figure*}
% DR2 data: success of LS
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Fig_2_RRLyr_LS_periodogram_ID_13350_g}
\caption{ An example of the \textit{astroML} Lomb Scargle periodogram performance, calculated for RR Lyr ID=13350 in SDSS g band (following Table 2 in \citep{2010ApJ...708..717S}), using the SDSS data from \citep{2010ApJ...708..717S}. It took 18.6 miliseconds on a laptop to calculate this periodogram. The upper left panel depicts the raw data. The upper right panel shows the phased light curve constructed with the 'true' period of 0.547987 days ('$P_{true}$'). The lower left panel shows the Lomb Scargle periodogram, where the orange and magenta vertical lines mark the location of the highest periodogram peak, and the frequency based on the reported period ($\omega_{true} = 2 \pi / P_{true}$). The lower right panel shows the phased light curve constructed with the Lomb-Scargle Periodogram period of 0.547161 days, corresponding to the highest peak, $P_{fit} = 2 \pi / \omega_{fit}$. The horizontal red and green lines mark the $5\%$ and $1\%$ significance levels for the highest peak, as found from 500 bootstrap resamplings ( See \url{}). The same object, but using the PDAC S82 S13 data is shown on Fig.~\ref{fig:RRLyr_13350_PDAC}}
\label{fig:RRLyr_13350_Sesar}
\end{figure*}
% DR2 data: failure of LS
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Fig_2_RRLyr_LS_periodogram_ID_4099_g}
\caption{An example of a failure of naive single Lomb Scargle periodogram performance - the ratio of $\omega_{true} / \omega_{fit} = 0.437$. In these four panels we use SDSS DR2 data for RR Lyr ID =4099 from \citep{2010ApJ...708..717S} . Upper-left panel: raw data. Upper-right panel: raw data folded on the true period. Bottom-left panel: Lomb-Scargle periodogram with significance levels. Bottom-right panel: the raw data folded on the LS period corresponding to the highest peak. The 'true' period from \citep{2010ApJ...708..717S} is 0.641754 days, whereas the naive Lomb-Scargle periodogram approach yields the 'fit' period of 0.280827 days. Here $\omega_{fit}$ and $\omega_{true}$ significantly differ for this RR Lyr, and the 'true' frequency appears as only one of many insignificant periodogram peaks. We show the PDAC S82 S13 data used for this object on Fig.~\ref{fig:RRLyr_4099_PDAC}). Everything else as on Fig.~\ref{fig:RRLyr_13350_Sesar}.}
\label{fig:RRLyr_4099_Sesar}
\end{figure*}
% DR2 data: failure of LS
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Fig_2_RRLyr_LS_periodogram_ID_470994_g}
\caption{Same as Fig.~\ref{fig:RRLyr_4099_Sesar}, using the SDSS DR2 data from \citep{2010ApJ...708..717S}. This RR Lyr ID=470994, has a cited period of 0.346794 days ('$P_{true}$'), whereas the period derived from the Lomb-Scargle periodogram is 0.531667 days ('$P_{fit}$'). Thus $\omega_{true} / \omega_{fit} = 1.53$. It may be a good example of aliasing.}
\label{fig:RRLyr_470994_Sesar}
\end{figure*}
% PDAC data: success of LS
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Fig_2_RRLyr_LS_periodogram_ID_13350_gPDAC}
\caption{ The same object as Fig.~\ref{fig:RRLyr_13350_Sesar}, but using data downloaded using PDAC. Using PDAC data, the RR Lyr ID=13350 has a best-fit period of 0.547969 days, almost identical to true period of 0.547987 from \citep{2010ApJ...708..717S}. Panels the same as on Fig.~\ref{fig:RRLyr_4099_Sesar}}
\label{fig:RRLyr_13350_PDAC}
\end{figure*}
% PDAC data: success of LS
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Fig_2_RRLyr_LS_periodogram_ID_4099_gPDAC}
\caption{ The same object as Fig.~\ref{fig:RRLyr_4099_Sesar}, but using data downloaded using PDAC. Calculating a naive LS periodogram using PDAC data for RR Lyr ID=4099 we find the best-fit period (frequency with highest power) of 0.280827 days, almost identical to the period found using LS periodogram on the SDSS \citep{2010ApJ...708..717S} data of 0.280827 days. Both are discrepant with respect to the 'true' period of 0.641754 days from \citep{2010ApJ...708..717S}. Panels the same as on Fig.~\ref{fig:RRLyr_13350_Sesar}}
\label{fig:RRLyr_4099_PDAC}
\end{figure*}
% PDAC data: failure of LS
\begin{figure*}
\includegraphics[width=\textwidth]{figs/Fig_2_RRLyr_LS_periodogram_ID_470994_gPDAC}
\caption{ The same object as Fig.~\ref{fig:RRLyr_470994_Sesar}, but using data downloaded from PDAC. Calculating a naive LS periodogram using PDAC data for RR Lyr ID=470994 we find the best-fit period (frequency with highest power) of 0.531667 days, almost twice as high as the 'true' period of 0.346794 days from \citep{2010ApJ...708..717S}. For this star we get an identical period if we use LS periodogram on SDSS data from \citep{2010ApJ...708..717S} as opposed to PDAC. Panels the same as on Fig.~\ref{fig:RRLyr_13350_Sesar}}
\label{fig:RRLyr_470994_PDAC}
\end{figure*}
% PDAC vs SDSS DR2 data: quality assessment ...
\begin{figure*}
\includegraphics[width=\columnwidth]{figs/Fig_PDAC_sesar_compare}
\caption{Comparison of the original ~\citep{2010ApJ...708..717S} light curves (green) against data for the same objects pulled from PDAC (blue). For each of the 383 light curves in SDSS $g$-band, without any pre-processing or clipping, we calculated the median, weighted mean, mean, and $chi^{2}_{DOF}$.}
\label{fig:PDAC_SDSS_comp}
\end{figure*}
\begin{table}
\centering
\caption{Comparison of RR Lyrae periods obtained with different methods. First column - 'true',is the 'ground truth' - period resulting from detailed template fitting by ~\citep{2010ApJ...708..717S}. Second column - DR2 LS, is the period corresponding to the most prominent frequency in the Lomb-Scargle periodogram computed on the SDSS DR2 light curve from ~\citep{2010ApJ...708..717S}. Third column - DR2 EXO, shows the period found for SDSS DR2 data of ~\citep{2010ApJ...708..717S} with the NASA Exoplanet Archive Periodogram service. Fourth column - PDAC LS, is the period found using Lomb-Scargle periodogram on PDAC S82 S13 data.}
\label{tab:periods}
\begin{tabular}{ l|rrrr}
\hline
ID & true & DR2 LS & DR2 EXO & PDAC LS \\
\hline
4099 & 0.641754 & 0.280827 & 0.64175 & 0.280827 \\
13350 & 0.547987 & 0.547161 & 0.35365 & 0.547969 \\
470994 & 0.346794 & 0.531667 & 0.34679 & 0.531667 \\
\end{tabular}
\end{table}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% SECTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Conclusions}
The Prototype Data Access Center readily implements many requirements for SUIT, as outlined in the \citeds{LDM-554} document. We find that the overall structure of the interface is very intuitive, building on the strengths of NASA/IPAC IRSA and SDSS CasJobs. Multiple ways of defining the query region, and resolving names of objects with NED helps quickly access the data, and adding SQL constraints makes it possible to reduce the amount of displayed information. An innovative Time Series View, relying on the robust back-end of the NASA Exoplanet Archive Periodogram Tool, has a great potential to aid rapid analysis of time series data. Easy access to external image catalogs allows overlaying information across a range of wavelengths, including a very well designed RGB-making toolbox. The Qserv responds to complicated SQL queries, and scisql functions help to execute very quick joins between catalogs to obtain needed information.
There is a number of areas with possible improvements, including aspects of the User Interface, as well as the SQL query engine. First, certain information, such as calibrated fluxes or magnitudes, is only available via Time Series view or a direct SQL query, while a user may be interested in accessing this information more directly. Furthermore, allowing to query any catalog by an objectId, without the need to specify the coordinates, would make it easier to find information for a particular object. Search capabilities will become more versatile once AllSky and Multiple object queries are supported. Ability to select limiting magnitude of the query would aid efficiency, avoiding the neccessity to display or download faint, and perhaps not desirable, measurements or objects. We also recommend expanding the educational aspect of the user interface: providing more explanatory metadata about column content, relating the available data to the image processing pipeline, and explaining the details of algorithms used in calculation of calibrated fluxes and magnitudes, as well as Lomb-Scargle periodogram (like ~\cite{2017arXiv170309824V}). On the level of the Qserv engine, errors currently provided experience a degradation of the error message quality, and it would be very beneficial to provide a meaningful error to the user interacting with PDAC. This would make it easier to debug the SQL queries, and streamline the data access.
This report outlined a system that is in active development. Current improvements in PDAC based on the user feedback are captured in the epic DM-10432. JIRA tickets specifically resulting from this work include DM-7990 , DM-10477, DM-10431, DM-10433, DM-10463 , DM-10465, DM-10466.
\appendix
\section{Appendix: SQL queries}
\subsection{Single source cone query, calibrated flux}
\label{sec:sql_single_flux}
\begin{lstlisting}
curl -o calib_lightcurve.json -d 'query= \
SELECT \
id, fsrc.exposure_time_mid , \
scisql_dnToFlux(fsrc.flux_psf, exp.fluxMag0) AS psfFlux, \
scisql_dnToFluxSigma(fsrc.flux_psf, fsrc.flux_psf_err, \
exp.fluxMag0, exp.fluxMag0Sigma) AS psfFluxErr \
FROM \
RunDeepForcedSource AS fsrc,
Science_Ccd_Exposure AS exp \
WHERE \
exp.scienceCcdExposureId = fsrc.exposure_id \
AND fsrc.exposure_filter_id = 1 \
AND objectId = 216172782516437336 \
ORDER BY exposure_time_mid' \
http://lsst-qserv-dax01.ncsa.illinois.edu:5000/db/v0/tap/sync
\end{lstlisting}
\subsection{Single source cone query, calibrated magnitude}
\label{sec:sql_single_mag}
\begin{lstlisting}
curl -o gmag_lightcurve.json -d 'query=\
SELECT \
objectId, fsrc.exposure_time_mid AS mjd, \
scisql_dnToAbMag(fsrc.flux_psf,exp.fluxMag0) AS g, \
scisql_dnToAbMagSigma(fsrc.flux_psf, fsrc.flux_psf_err,\
exp.fluxMag0, exp.fluxMag0Sigma) AS gErr \
FROM \
RunDeepForcedSource AS fsrc, \
Science_Ccd_Exposure AS exp \
WHERE \
exp.scienceCcdExposureId = fsrc.exposure_id \
AND fsrc.exposure_filter_id=1 \
AND objectId IN (216172782516437336)' \
http://lsst-qserv-dax01.ncsa.illinois.edu:5000/db/v0/tap/sync
\end{lstlisting}
\subsection{Box query, i<23.5}
\label{sec:sql_box}
\begin{lstlisting}
curl -o deepSourceBox.json -d 'query= \
SELECT \
dsrc.parent AS parent, dsrc.id AS id, \
scisql_dnToAbMag(dsrc.flux_psf,coadd.fluxMag0) AS i, \
scisql_dnToAbMagSigma(dsrc.flux_psf, dsrc.flux_psf_err, \
coadd.fluxMag0, coadd.fluxMag0Sigma) AS iErr \
FROM \
RunDeepSource AS dsrc, \
DeepCoadd AS coadd \
WHERE \
qserv_areaspec_box(325,-0.7, 325.1, -0.6)\
AND dsrc.coadd_id = coadd.deepCoaddId \
AND dsrc.coadd_filter_id = 3 \
HAVING i < 23.5 ' \
http://lsst-qserv-dax01.ncsa.illinois.edu:5000/db/v0/tap/sync
\end{lstlisting}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%