Add kb exhibition for unstructured ll

Signed-off-by: SiqiLuo <[email protected]>
kubeedge · Jul 13, 2023 · 1231797 · 1231797
1 parent 3e8de61
commit 1231797
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 10 deletions.
diff --git a/examples/lifelong_learning/cityscapes/RFNet/interface.py b/examples/lifelong_learning/cityscapes/RFNet/interface.py
@@ -32,6 +32,12 @@
 from estimators.eval import Validator, load_my_state_dict
 from accuracy import accuracy
 
+classes = ["road", "sidewalk", "building", "wall", "fence", "pole", "light",
+           "sign", "vegetation", "terrain", "sky", "pedestrian", "rider",
+           "car", "truck", "bus", "train", "motorcycle", "bicycle", "stair",
+           "curb", "ramp", "runway", "flowerbed", "door", "CCTV camera",
+           "Manhole", "hydrant", "belt", "dustbin", "ignore"]
+
 
 def preprocess_url(image_urls):
     transformed_images = []
@@ -80,6 +86,8 @@ def preprocess_frames(frames):
 
 class Estimator:
     def __init__(self, **kwargs):
+        self.classes = kwargs.get("classes", classes)
+
         self.train_args = TrainingArguments(**kwargs)
         self.val_args = EvaluationArguments(**kwargs)
 

diff --git a/lib/sedna/algorithms/seen_task_learning/task_definition/task_definition_by_origin.py b/lib/sedna/algorithms/seen_task_learning/task_definition/task_definition_by_origin.py
@@ -54,7 +54,7 @@ def __call__(self,
         _sample = BaseDataSource(data_type=d_type)
         _sample.x, _sample.y = _x, _y
 
-        g_attr = f"{self.attribute[0]}.model"
+        g_attr = f"{self.attribute[0]}"
         task_obj = Task(entry=g_attr, samples=_sample,
                         meta_attr=self.attribute[0])
         tasks.append(task_obj)
@@ -65,7 +65,7 @@ def __call__(self,
         _sample = BaseDataSource(data_type=d_type)
         _sample.x, _sample.y = _x, _y
 
-        g_attr = f"{self.attribute[-1]}.model"
+        g_attr = f"{self.attribute[-1]}"
         task_obj = Task(entry=g_attr, samples=_sample,
                         meta_attr=self.attribute[-1])
         tasks.append(task_obj)

diff --git a/lib/sedna/algorithms/unseen_task_processing/unseen_task_processing.py b/lib/sedna/algorithms/unseen_task_processing/unseen_task_processing.py
@@ -40,7 +40,7 @@ class UnseenTaskProcessing:
     '''
 
     def __init__(self, estimator, unseen_task_allocation=None, **kwargs):
-        self.estimator = estimator
+        self.base_model = estimator
         self.unseen_task_allocation = unseen_task_allocation or {
             "method": "UnseenTaskAllocationDefault"
         }
@@ -140,16 +140,16 @@ def predict(self, data, post_process=None, **kwargs):
         tasks : List
             tasks assigned to each sample.
         """
-        if callable(self.estimator):
-            return self.estimator(), []
+        if callable(self.base_model):
+            return self.base_model(), []
 
         if not self.unseen_task_groups and not self.unseen_models:
             self.load(kwargs.get("task_index"))
 
         tasks = []
         res = []
         for inx, df in enumerate(data.x):
-            pred = self.estimator.predict([df])
+            pred = self.base_model.predict([df])
             task = Task(entry=inx, samples=df)
             task.result = pred
             tasks.append(task)

diff --git a/lib/sedna/backend/base.py b/lib/sedna/backend/base.py
@@ -30,6 +30,10 @@ def __init__(self, estimator, fine_tune=True, **kwargs):
         self.model_save_path = kwargs.get("model_save_path") or "/tmp"
         self.default_name = kwargs.get("model_name")
         self.has_load = False
+        try:
+            self.classes = estimator.base_model.classes
+        except AttributeError:
+            self.classes = ""
 
     @property
     def model_name(self):

diff --git a/lib/sedna/core/lifelong_learning/knowledge_management/cloud_knowledge_management.py b/lib/sedna/core/lifelong_learning/knowledge_management/cloud_knowledge_management.py
@@ -37,8 +37,7 @@ def __init__(self, config, seen_estimator, unseen_estimator, **kwargs):
             config, seen_estimator, unseen_estimator)
 
         self.last_task_index = kwargs.get("last_task_index", None)
-        self.cloud_output_url = config.get(
-            "cloud_output_url", "/tmp")
+        self.cloud_output_url = config.get("cloud_output_url")
         self.task_index = FileOps.join_path(
             self.cloud_output_url, config["task_index"])
         self.local_task_index_url = KBResourceConstant.KB_INDEX_NAME.value

diff --git a/lib/sedna/core/lifelong_learning/lifelong_learning.py b/lib/sedna/core/lifelong_learning/lifelong_learning.py
@@ -356,6 +356,8 @@ def update(self, train_data, valid_data=None, post_process=None, **kwargs):
                 task_index,
                 relpath=self.config.data_path_prefix)
 
+        task_info_res = self._generate_taskgrp_info(task_info_res, task_index)
+
         self.report_task_info(
             None, K8sResourceKindStatus.COMPLETED.value, task_info_res)
         self.log.info(f"Lifelong learning Update task Finished, "
@@ -393,7 +395,7 @@ def evaluate(self, data, post_process=None, **kwargs):
                 f"Download kb index from {task_index_url} to {index_url}")
             FileOps.download(task_index_url, index_url)
 
-        res, index_file = self._task_evaluation(
+        res, index_file, task_details = self._task_evaluation(
             data, task_index=index_url, **kwargs)
         self.log.info("Task evaluation finishes.")
 
@@ -404,6 +406,10 @@ def evaluate(self, data, post_process=None, **kwargs):
         task_info_res = self.estimator.model_info(
             self.cloud_knowledge_management.task_index, result=res,
             relpath=self.config.data_path_prefix)
+        task_info_res = self._generate_taskgrp_info(
+            task_info_res,
+            self.cloud_knowledge_management.task_index,
+            task_details=task_details)
         self.report_task_info(
             None,
             K8sResourceKindStatus.COMPLETED.value,
@@ -542,4 +548,35 @@ def _task_evaluation(self, data, **kwargs):
         else:
             self.log.info(f"Deploy {index_file} to the edge.")
 
-        return res, index_file
+        return res, index_file, tasks_detail
+
+    def _generate_taskgrp_info(self, task_info_res, task_index, **kwargs):
+        if isinstance(task_index, str):
+            task_index = FileOps.load(task_index)
+
+        for task_info_dict in task_info_res:
+            task_grps = task_index["seen_task"]["task_groups"]
+            task_info_dict["number_of_model"] = len(task_grps)
+
+            # TODO: obtain unseen samples from storage space,
+            # not from lib
+            task_info_dict["number_of_unseen_sample"] = 0
+            # TODO: obtain labeled unseen samples from storage space,
+            # not from lib
+            task_info_dict["number_of_labeled_unseen_sample"] = \
+                sum(task_grp.samples.num_examples(
+                ) for task_grp in task_grps)
+
+            current_metric = {}
+            tasks_detail = kwargs.get("tasks_detail", None)
+            if tasks_detail:
+                for task in tasks_detail:
+                    current_metric[task.entry] = task.scores
+            else:
+                for task_grp in task_grps:
+                    current_metric[task_grp.model.entry] = \
+                        task_grp.model.result
+            task_info_dict["current_metric"] = current_metric
+
+            task_info_dict["classes"] = self.estimator.classes
+        return task_info_res