From eca55cc565481c22cd79b96bcf09aecc509953b9 Mon Sep 17 00:00:00 2001 From: ncoop57 Date: Sat, 13 Feb 2021 19:23:18 +0000 Subject: [PATCH] Update lib and docs --- docs/cli.html | 308 +++++++++--------------------------------- docs/combo.html | 114 +++++++++++++++- two_to_tango/cli.py | 4 +- two_to_tango/combo.py | 16 ++- two_to_tango/prep.py | 1 + 5 files changed, 185 insertions(+), 258 deletions(-) diff --git a/docs/cli.html b/docs/cli.html index 41095a0..79c654a 100644 --- a/docs/cli.html +++ b/docs/cli.html @@ -95,20 +95,6 @@

download -
- -
- -
-
INFO:root:Downloading and extracting datasets and models to /tf/main/data.
-
-
-
- -
- - {% endraw %} @@ -133,20 +119,19 @@

download
-
VWORDS = [1_000]
-N_IMGS = 15_000
-N_FRAMES_TO_KEEP = [1]
+
# N_IMGS = 15_000
+# N_FRAMES_TO_KEEP = [1]
 FPS = 30
 
-out_path = Path("/tf/main/tango_reproduction_package/outputs")
-art_path = Path("/tf/main/tango_reproduction_package/artifacts")
+out_path = Path("/tf/main/data/output")
+art_path = Path("/tf/main/data/tango_reproduction_package/artifacts")
 vis_model = "SimCLR"
 
-vid_ds = VideoDataset.from_path(
-    art_path/"videos", fr = FPS
-).label_from_paths()
+# vid_ds = VideoDataset.from_path(
+#     art_path/"videos", fr = FPS
+# ).label_from_paths()
 
-_generate_vis_results(vid_ds, out_path, art_path, vis_model)
+# _generate_vis_results(vid_ds, out_path, art_path, vis_model)
 
@@ -171,7 +156,7 @@

download
N_FRAMES_TO_KEEP = [1, 5]
-_generate_txt_results(vid_ds, out_path, art_path, vis_model)
+_generate_txt_results(None, out_path, art_path, vis_model)
 
@@ -192,130 +177,35 @@

download
-
{'CC1': [('U1', 'APOD-CC1-U1'), ('U2', 'APOD-CC1-U2'), ('U12', 'APOD-CC1-U12')],
- 'CC2': [('U1', 'APOD-CC2-U1'), ('U2', 'APOD-CC2-U2'), ('U12', 'APOD-CC2-U12')],
- 'CC3': [('U1', 'APOD-CC3-U1'), ('U2', 'APOD-CC3-U2'), ('U12', 'APOD-CC3-U12')],
- 'CC4': [('U7', 'APOD-CC4-U7'), ('U8', 'APOD-CC4-U8'), ('U12', 'APOD-CC4-U12')],
- 'CC5': [('U7', 'APOD-CC5-U7'), ('U8', 'APOD-CC5-U8'), ('U12', 'APOD-CC5-U12')],
- 'CC6': [('U7', 'APOD-CC6-U7'), ('U8', 'APOD-CC6-U8'), ('U12', 'APOD-CC6-U12')],
- 'CC7': [('U7', 'APOD-CC7-U7'), ('U8', 'APOD-CC7-U8'), ('U12', 'APOD-CC7-U12')],
- 'CC8': [('U5', 'APOD-CC8-U5'),
-         ('U10', 'APOD-CC8-U10'),
-         ('U12', 'APOD-CC8-U12')],
- 'CC9': [('U5', 'APOD-CC9-U5'), ('U9', 'APOD-CC9-U9'), ('U12', 'APOD-CC9-U12')],
- 'RB': [('U1', 'APOD-RB-U1'), ('U5', 'APOD-RB-U5'), ('U12', 'APOD-RB-U12')]}
-{'CC1': [('U6', 'DROID-CC1-U6'),
-         ('U10', 'DROID-CC1-U10'),
-         ('U13', 'DROID-CC1-U13')],
- 'CC11': [('U7', 'DROID-CC11-U7'),
-          ('U10', 'DROID-CC11-U10'),
-          ('U13', 'DROID-CC11-U13')],
- 'CC2': [('U7', 'DROID-CC2-U7'),
-         ('U10', 'DROID-CC2-U10'),
-         ('U13', 'DROID-CC2-U13')],
- 'CC3': [('U6', 'DROID-CC3-U6'),
-         ('U9', 'DROID-CC3-U9'),
-         ('U13', 'DROID-CC3-U13')],
- 'CC4': [('U7', 'DROID-CC4-U7'),
-         ('U10', 'DROID-CC4-U10'),
-         ('U13', 'DROID-CC4-U13')],
- 'CC5': [('U7', 'DROID-CC5-U7'),
-         ('U8', 'DROID-CC5-U8'),
-         ('U13', 'DROID-CC5-U13')],
- 'CC6': [('U7', 'DROID-CC6-U7'),
-         ('U8', 'DROID-CC6-U8'),
-         ('U13', 'DROID-CC6-U13')],
- 'CC7': [('U7', 'DROID-CC7-U7'),
-         ('U8', 'DROID-CC7-U8'),
-         ('U13', 'DROID-CC7-U13')],
- 'CC8': [('U9', 'DROID-CC8-U9'),
-         ('U10', 'DROID-CC8-U10'),
-         ('U13', 'DROID-CC8-U13')],
- 'CC9': [('U9', 'DROID-CC9-U9'),
-         ('U10', 'DROID-CC9-U10'),
-         ('U13', 'DROID-CC9-U13')]}
-{'CC1': [('U2', 'TIME-CC1-U2'), ('U3', 'TIME-CC1-U3'), ('U11', 'TIME-CC1-U11')],
- 'CC2': [('U2', 'TIME-CC2-U2'), ('U3', 'TIME-CC2-U3'), ('U11', 'TIME-CC2-U11')],
- 'CC3': [('U2', 'TIME-CC3-U2'), ('U3', 'TIME-CC3-U3'), ('U11', 'TIME-CC3-U11')],
- 'CC4': [('U9', 'TIME-CC4-U9'),
-         ('U10', 'TIME-CC4-U10'),
-         ('U11', 'TIME-CC4-U11')],
- 'CC5': [('U9', 'TIME-CC5-U9'),
-         ('U10', 'TIME-CC5-U10'),
-         ('U11', 'TIME-CC5-U11')],
- 'CC6': [('U9', 'TIME-CC6-U9'),
-         ('U10', 'TIME-CC6-U10'),
-         ('U11', 'TIME-CC6-U11')],
- 'CC7': [('U9', 'TIME-CC7-U9'),
-         ('U10', 'TIME-CC7-U10'),
-         ('U11', 'TIME-CC7-U11')],
- 'CC8': [('U2', 'TIME-CC8-U2'), ('U3', 'TIME-CC8-U3'), ('U11', 'TIME-CC8-U11')],
- 'CC9': [('U5', 'TIME-CC9-U5'), ('U9', 'TIME-CC9-U9'), ('U11', 'TIME-CC9-U11')],
- 'RC': [('U2', 'TIME-RC-U2'), ('U5', 'TIME-RC-U5'), ('U11', 'TIME-RC-U11')]}
-{'CC1': [('U4', 'GNU-CC1-U4'), ('U5', 'GNU-CC1-U5'), ('U13', 'GNU-CC1-U13')],
- 'CC2': [('U4', 'GNU-CC2-U4'), ('U5', 'GNU-CC2-U5'), ('U13', 'GNU-CC2-U13')],
- 'CC3': [('U4', 'GNU-CC3-U4'), ('U5', 'GNU-CC3-U5'), ('U13', 'GNU-CC3-U13')],
- 'CC4': [('U1', 'GNU-CC4-U1'), ('U3', 'GNU-CC4-U3'), ('U13', 'GNU-CC4-U13')],
- 'CC5': [('U10', 'GNU-CC5-U10'), ('U3', 'GNU-CC5-U3'), ('U13', 'GNU-CC5-U13')],
- 'CC6': [('U1', 'GNU-CC6-U1'), ('U3', 'GNU-CC6-U3'), ('U13', 'GNU-CC6-U13')],
- 'CC7': [('U1', 'GNU-CC7-U1'), ('U3', 'GNU-CC7-U3'), ('U13', 'GNU-CC7-U13')],
- 'CC8': [('U6', 'GNU-CC8-U6'), ('U10', 'GNU-CC8-U10'), ('U13', 'GNU-CC8-U13')],
- 'CC9': [('U6', 'GNU-CC9-U6'), ('U10', 'GNU-CC9-U10'), ('U13', 'GNU-CC9-U13')],
- 'RC': [('U4', 'GNU-RC-U4'), ('U6', 'GNU-RC-U6'), ('U13', 'GNU-RC-U13')]}
-{'CC1': [('U5', 'GROW-CC1-U5'), ('U6', 'GROW-CC1-U6'), ('U14', 'GROW-CC1-U14')],
- 'CC2': [('U10', 'GROW-CC2-U10'),
-         ('U6', 'GROW-CC2-U6'),
-         ('U14', 'GROW-CC2-U14')],
- 'CC3': [('U5', 'GROW-CC3-U5'), ('U6', 'GROW-CC3-U6'), ('U14', 'GROW-CC3-U14')],
- 'CC4': [('U4', 'GROW-CC4-U4'), ('U6', 'GROW-CC4-U6'), ('U14', 'GROW-CC4-U14')],
- 'CC5': [('U4', 'GROW-CC5-U4'), ('U8', 'GROW-CC5-U8'), ('U14', 'GROW-CC5-U14')],
- 'CC6': [('U4', 'GROW-CC6-U4'), ('U7', 'GROW-CC6-U7'), ('U14', 'GROW-CC6-U14')],
- 'CC7': [('U4', 'GROW-CC7-U4'), ('U6', 'GROW-CC7-U6'), ('U14', 'GROW-CC7-U14')],
- 'CC8': [('U7', 'GROW-CC8-U7'), ('U8', 'GROW-CC8-U8'), ('U14', 'GROW-CC8-U14')],
- 'CC9': [('U7', 'GROW-CC9-U7'), ('U8', 'GROW-CC9-U8'), ('U14', 'GROW-CC9-U14')],
- 'RC': [('U5', 'GROW-RC-U5'),
-        ('U7', 'GROW-RC-U7'),
-        ('U8', 'GROW-RC-U8'),
-        ('U14', 'GROW-RC-U14')]}
-{'CC1': [('U3', 'TOK-CC1-U3'), ('U4', 'TOK-CC1-U4'), ('U11', 'TOK-CC1-U11')],
- 'CC2': [('U3', 'TOK-CC2-U3'), ('U4', 'TOK-CC2-U4'), ('U11', 'TOK-CC2-U11')],
- 'CC3': [('U3', 'TOK-CC3-U3'), ('U4', 'TOK-CC3-U4'), ('U11', 'TOK-CC3-U11')],
- 'CC4': [('U1', 'TOK-CC4-U1'), ('U2', 'TOK-CC4-U2'), ('U11', 'TOK-CC4-U11')],
- 'CC5': [('U1', 'TOK-CC5-U1'), ('U2', 'TOK-CC5-U2'), ('U11', 'TOK-CC5-U11')],
- 'CC6': [('U1', 'TOK-CC6-U1'), ('U2', 'TOK-CC6-U2'), ('U11', 'TOK-CC6-U11')],
- 'CC7': [('U1', 'TOK-CC7-U1'), ('U2', 'TOK-CC7-U2'), ('U11', 'TOK-CC7-U11')],
- 'CC8': [('U9', 'TOK-CC8-U9'), ('U10', 'TOK-CC8-U10'), ('U11', 'TOK-CC8-U11')],
- 'CC9': [('U9', 'TOK-CC9-U9'), ('U10', 'TOK-CC9-U10'), ('U11', 'TOK-CC9-U11')],
- 'RB': [('U3', 'TOK-RB-U3'), ('U4', 'TOK-RB-U4'), ('U11', 'TOK-RB-U11')]}
-dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '5ftk', 'technique': 'weighted_lcs'}
+
dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '1ftk', 'technique': 'weighted_lcs'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '5ftk', 'technique': 'bovw'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '1ftk', 'technique': 'bovw'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '5ftk', 'technique': 'lcs'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '1ftk', 'technique': 'lcs'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '5ftk', 'technique': 'bovw_lcs'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '1ftk', 'technique': 'bovw_lcs'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '5ftk', 'technique': 'bovw_weighted_lcs'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '1ftk', 'technique': 'bovw_weighted_lcs'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '1ftk', 'technique': 'weighted_lcs'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '5ftk', 'technique': 'weighted_lcs'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '1ftk', 'technique': 'bovw'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '5ftk', 'technique': 'bovw'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '1ftk', 'technique': 'lcs'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '5ftk', 'technique': 'lcs'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '1ftk', 'technique': 'bovw_lcs'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '5ftk', 'technique': 'bovw_lcs'}
 Running setting setting2
 dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])
-Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '1ftk', 'technique': 'bovw_weighted_lcs'}
+Running config:  {'model': 'SimCLR', 'vwords': '1000vw', 'fps': '30fps', 'ftk': '5ftk', 'technique': 'bovw_weighted_lcs'}
 Running setting setting2
 Writing results and rankings
 done
@@ -336,7 +226,9 @@ 

download
-
combo_out_path = out_path/"combined"
+
out_path = Path("/tf/main/data/output")
+
+combo_out_path = out_path/"combined"
 dl_ranking_path = out_path/"user_rankings_weighted_all"/"all_rankings.csv"
 txt_path = art_path/"models"/"OCR+IR"
 ir_rankings_path = txt_path/"tango_txt_rankings"/"all_rankings.json"
@@ -354,18 +246,30 @@ 

download -
-
('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '1', 'all_text')
-('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '5', 'all_text')
-('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '5', 'unique_frames')
-('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '5', 'unique_words')
-('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '1', 'all_text')
-('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '5', 'all_text')
-('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '5', 'unique_frames')
-('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '5', 'unique_words')
---- 269.1505823135376 seconds ---
-Writing data
-
+
+
+---------------------------------------------------------------------------
+FileNotFoundError                         Traceback (most recent call last)
+<ipython-input-29-53e994fd6274> in <module>
+      7 settings_path = out_path/"evaluation_settings"
+      8 
+----> 9 tango_combined(combo_out_path, dl_ranking_path, ir_rankings_path, settings_path, BEST_DL_MODELS, BEST_IR_MODELS)
+
+~tf/main/two_to_tango/combo.py in tango_combined(out_path, dl_rankings_path, ir_rankings_path, settings_path, dl_models, ir_models)
+    208                                                                     rec['technique'],))
+    209 
+--> 210     ir_rankings = read_json(ir_rankings_path)
+    211     ir_rankings_by_config = group_dict(ir_rankings, lambda rec: (rec['model'], rec['fps'],
+    212                                                                     rec['technique'],))
+
+~tf/main/two_to_tango/utils.py in read_json(file_path)
+     37 
+     38 def read_json(file_path):
+---> 39     with open(file_path) as file:
+     40         return json.load(file)
+     41 
+
+FileNotFoundError: [Errno 2] No such file or directory: '/tf/main/data/tango_reproduction_package/artifacts/models/OCR+IR/tango_txt_rankings/all_rankings.json'
@@ -397,7 +301,7 @@

download - -
-
-
        Model: M01-1000vw-5ftk-bovw,ocr+ir-5-all_text,weight=0.2-0
-        Overall mRR: 0.7978683662016994
-        Overall mAP: 0.7324409215612918
-        Overall Mean Rank: 1.733127572016461
+
        Model: SimCLR-1000vw-5ftk-bovw,ocr+ir-5ftk-all_text,weight=0.2-0
+        Overall mRR: 0.7521122972820503
+        Overall mAP: 0.6843871569103048
+        Overall Mean Rank: 1.9419753086419753
         
-        Model: M01-1000vw-5ftk-bovw
-        Overall mRR: 0.7565689857455435
-        Overall mAP: 0.6790426129261438
-        Overall Mean Rank: 1.9712166172106824
+        Model: SimCLR-1000vw-5ftk-bovw
+        Overall mRR: 0.708187709862351
+        Overall mAP: 0.6392624653390201
+        Overall Mean Rank: 2.105794790005316
         
-
-
-
- -
- -
-
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:27: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
-
-
-
- -
@@ -478,7 +361,7 @@

download -

reproduce[source]

reproduce(down_path:"The directory where all the files will be downloaded and extracted to.", out_path:"The output path to place all results in.", vis_model:"The type of visual model. Can be either SimCLR or SIFT, taking ~6 hours or >2 weeks, respectively, for all apps on our machine with 755G of RAM and 72 CPUs.")

+

reproduce[source]

reproduce(down_path:"The directory where all the files will be downloaded and extracted to.", out_path:"The output path to place all results in.", vis_model:"The type of visual model. Can be either SimCLR or SIFT, taking ~6 hours or >2 weeks, respectively, for all apps on our machine with 755G of RAM and 72 CPUs.")

Function for reproducing all results related to this tool's paper

@@ -522,22 +405,6 @@

reproduce -
- -
- -
-
INFO:root:Downloading and extracting datasets and models to /tf/main/data.
-INFO:root:Loading videos.
-INFO:root:Computing rankings and calculating metrics for SimCLR visual model.
-
-
-
- -
-

-
{% endraw %} @@ -552,7 +419,7 @@

reproduce -

tango[source]

tango(q_path:"Path to the query video", cor_path:"Path to the corpus", simclr_path:"Path to the SimCLR model directory")

+

tango[source]

tango(q_path:"Path to the query video", cor_path:"Path to the corpus", simclr_path:"Path to the SimCLR model directory")

Function for calculating similarity scores of a corpus of video-based bug reports to a query video-based bug report. Currently only uses the top @@ -593,51 +460,6 @@

tango -
- -
- -
-
-
-OrderedDict([   (('APOD', 'CC1', 'U1'), 0.9838350837260246),
-                (('APOD', 'CC1', 'U12'), 0.9193482983504456),
-                (('APOD', 'CC1', 'U2'), 0.3723964243572911),
-                (('APOD', 'CC6', 'U12'), 0.3718521026630344),
-                (('APOD', 'CC9', 'U12'), 0.36803837161265085),
-                (('APOD', 'CC6', 'U8'), 0.33589710905277315),
-                (('APOD', 'CC4', 'U12'), 0.3118613303188616),
-                (('APOD', 'CC9', 'U5'), 0.2718403622668689),
-                (('APOD', 'CC4', 'U7'), 0.25082093055745974),
-                (('APOD', 'CC9', 'U9'), 0.22580393621884165),
-                (('APOD', 'CC5', 'U12'), 0.1768510685792533),
-                (('APOD', 'CC6', 'U7'), 0.1682816804179776),
-                (('APOD', 'CC4', 'U8'), 0.13915926428362999),
-                (('APOD', 'CC2', 'U2'), 0.1354447367818957),
-                (('APOD', 'CC3', 'U2'), 0.12004454785432789),
-                (('APOD', 'CC5', 'U7'), 0.11235793525631509),
-                (('APOD', 'CC5', 'U8'), 0.10867946897348428),
-                (('APOD', 'CC7', 'U8'), 0.09014217805772731),
-                (('APOD', 'RB', 'U1'), 0.08324154319710894),
-                (('APOD', 'CC8', 'U12'), 0.08179046960502091),
-                (('APOD', 'CC8', 'U5'), 0.07290831091450554),
-                (('APOD', 'RB', 'U5'), 0.07194441953180176),
-                (('APOD', 'CC3', 'U12'), 0.06729098674201965),
-                (('APOD', 'CC7', 'U7'), 0.06326635817907807),
-                (('APOD', 'CC7', 'U12'), 0.05922061313241868),
-                (('APOD', 'CC8', 'U10'), 0.05328420969145727),
-                (('APOD', 'CC2', 'U12'), 0.04707548776015297),
-                (('APOD', 'CC3', 'U1'), 0.04342630487280919),
-                (('APOD', 'CC2', 'U1'), 0.04211602057931267),
-                (('APOD', 'RB', 'U12'), 0.029766244020504186)])
-
-
-
- -
-

-

{% endraw %} diff --git a/docs/combo.html b/docs/combo.html index 37c543c..4c419ac 100644 --- a/docs/combo.html +++ b/docs/combo.html @@ -202,7 +202,7 @@

convert_results_format<
-

get_info_to_ranking_results[source]

get_info_to_ranking_results(ranking, ranking_results, run, dl_model, ir_model, weight_str, setting)

+

get_info_to_ranking_results[source]

get_info_to_ranking_results(ranking, ranking_results, run, dl_model, ir_model, weight_str, setting)

@@ -233,7 +233,7 @@

get_info_to_rankin
-

tango_combined[source]

tango_combined(out_path, dl_rankings_path, ir_rankings_path, settings_path, dl_models, ir_models)

+

tango_combined[source]

tango_combined(out_path, dl_rankings_path, ir_rankings_path, settings_path, dl_models, ir_models)

@@ -260,11 +260,10 @@

tango_combined + {% endraw %} + + {% raw %} + +
+
+ +
+
+
out_path = Path("/tf/main/data/output")
+art_path = Path("/tf/main/data/downloads/tango_reproduction_package/artifacts")
+
+combo_out_path = out_path/"combined"
+dl_ranking_path = out_path/"user_rankings_weighted_all"/"all_rankings.csv"
+txt_path = art_path/"models"/"OCR+IR"
+ir_rankings_path = txt_path/"tango_txt_rankings"/"all_rankings.json"
+settings_path = out_path/"evaluation_settings"
+
+tango_combined(combo_out_path, dl_ranking_path, ir_rankings_path, settings_path, BEST_DL_MODELS, BEST_IR_MODELS)
+
+ +
+
+
+ +
+
+ +
+ +
+
('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '1ftk', 'all_text')
+('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '5ftk', 'all_text')
+('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '5ftk', 'unique_frames')
+('SimCLR', '1000vw', '5ftk', 'bovw') ('ocr+ir', '5ftk', 'unique_words')
+('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '1ftk', 'all_text')
+('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '5ftk', 'all_text')
+('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '5ftk', 'unique_frames')
+('SimCLR', '1000vw', '5ftk', 'bovw_weighted_lcs') ('ocr+ir', '5ftk', 'unique_words')
+--- 199.70875334739685 seconds ---
+Writing data
+
+
+
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+
+
{% endraw %} diff --git a/two_to_tango/cli.py b/two_to_tango/cli.py index 86ad397..fc17390 100644 --- a/two_to_tango/cli.py +++ b/two_to_tango/cli.py @@ -77,13 +77,12 @@ def download( BEST_MODEL_CONFIGS = { "SimCLR": "SimCLR-1000vw-5ftk-bovw", "SIFT": "SIFT-10000vw-1ftk-bovw_weighted_lcs", - "OCR+IR": "ocr+ir-5-all_text" + "OCR+IR": "ocr+ir-5ftk-all_text" } # Cell def _generate_vis_results(vid_ds, out_path, art_path, vis_model): if vis_model == "SimCLR": - vis_model = "M01" simclr = SimCLRModel.load_from_checkpoint( checkpoint_path = str( art_path/"models"/"SimCLR"/"checkpointepoch=98.ckpt" @@ -92,7 +91,6 @@ def _generate_vis_results(vid_ds, out_path, art_path, vis_model): model = SimCLRExtractor(simclr) sim_func = simclr_frame_sim else: - vis_model = "M00" model = SIFTExtractor(cv2.xfeatures2d.SIFT_create(nfeatures = 10)) sim_func = sift_frame_sim diff --git a/two_to_tango/combo.py b/two_to_tango/combo.py index 02aea2c..82c4199 100644 --- a/two_to_tango/combo.py +++ b/two_to_tango/combo.py @@ -116,6 +116,7 @@ def convert_results_format(sim_path, settings_path, out_path, models): vwords = file_tokens[3] frames_per_sec = file_tokens[4] + ftk = file_tokens[5] model_similarities = pickle.load(open(sim_file, 'rb')) @@ -126,6 +127,7 @@ def convert_results_format(sim_path, settings_path, out_path, models): "model": model, "vwords": vwords, "fps": frames_per_sec, + "ftk": ftk, "technique": technique } @@ -177,7 +179,10 @@ def get_info_to_ranking_results(ranking, ranking_results, run, dl_model, ir_mode return ranking_info, ranking_results # Cell -def tango_combined(out_path, dl_rankings_path, ir_rankings_path, settings_path, dl_models, ir_models): +def tango_combined( + out_path, dl_rankings_path, ir_rankings_path, + settings_path, dl_models, ir_models +): # all_data results_out_path = out_path/"tango_comb_results" rankings_out_path = out_path/"tango_comb_rankings" @@ -199,11 +204,11 @@ def tango_combined(out_path, dl_rankings_path, ir_rankings_path, settings_path, settings = load_settings(settings_path) dl_rankings = read_json_line_by_line(dl_rankings_path) - dl_rankings_by_config = group_dict(dl_rankings, lambda rec: (rec['model'], rec['vwords'], rec['fps'], + dl_rankings_by_config = group_dict(dl_rankings, lambda rec: (rec['model'], rec['vwords'], rec['ftk'], rec['technique'],)) ir_rankings = read_json(ir_rankings_path) - ir_rankings_by_config = group_dict(ir_rankings, lambda rec: (rec['model'], rec['fps'], + ir_rankings_by_config = group_dict(ir_rankings, lambda rec: (rec['model'], rec['fps'] + "ftk", rec['technique'],)) # best_dl_models = [ @@ -226,9 +231,8 @@ def tango_combined(out_path, dl_rankings_path, ir_rankings_path, settings_path, dl_models = list(filter(lambda rec: "-".join([rec[0], rec[1], rec[2], rec[3]]) in dl_models, dl_rankings_by_config.keys())) - ir_models = list(filter(lambda rec: "-".join([rec[0], "", rec[1] + "ftk", rec[2]]) in ir_models, + ir_models = list(filter(lambda rec: "-".join([rec[0], rec[1], rec[2]]) in ir_models, ir_rankings_by_config.keys())) - # run combinations start_time = time.time() @@ -242,7 +246,7 @@ def tango_combined(out_path, dl_rankings_path, ir_rankings_path, settings_path, print(dl_model, ir_model) - app_for_comb = ir_model_apps_for_comb["-".join([ir_model[1] + "ftk", ir_model[2]])] + app_for_comb = ir_model_apps_for_comb["-".join([ir_model[1], ir_model[2]])] for setting in settings_to_run: dl_runs = group_dict(dl_mod_rankings[setting], lambda rec: rec["run_id"]) diff --git a/two_to_tango/prep.py b/two_to_tango/prep.py index fe5676a..7c52511 100644 --- a/two_to_tango/prep.py +++ b/two_to_tango/prep.py @@ -275,6 +275,7 @@ def get_all_texts(vid_ds, out_path, fps): frames_text = sorted(frames_text, key=lambda t: t["f"]) + video_name = video_name.replace("_fixed_30", "") out_file = os.path.join(video_output_path, video_name + '.json') write_json_line_by_line(frames_text, out_file)