diff --git a/tgx/classes/graph.py b/tgx/classes/graph.py index 9ec45e6..5abd84d 100644 --- a/tgx/classes/graph.py +++ b/tgx/classes/graph.py @@ -39,17 +39,21 @@ def __init__(self, def discretize(self, time_scale: Union[str, int], - store_unix: bool = False) -> object: + store_unix: bool = False, + freq_weight: bool = False) -> object: """ discretize the graph object based on the given time interval Args: time_scale: time interval to discretize the graph + store_unix: whether to store converted unix time in a list + freq_weight: whether to weight the edges by frequency in the new graph object """ new_G = copy.deepcopy(self) # discretie differently based on # of intervals of time granularity output = discretize_edges(self.data, time_scale = time_scale, - store_unix = store_unix) + store_unix = store_unix, + freq_weight = freq_weight) disc_G = output[0] new_G.data = disc_G if (store_unix): @@ -181,8 +185,7 @@ def save2csv(self, for t, edges_list in self.data.items(): for edge in edges_list: (u, v) = edge - csvwriter.writerow([t] + [u] + [v]) - + csvwriter.writerow([t] + [u] + [v]) # def _generate_graph(self, # edgelist: Optional[dict] = None diff --git a/tgx/utils/graph_utils.py b/tgx/utils/graph_utils.py index 4264c4b..b9b9023 100644 --- a/tgx/utils/graph_utils.py +++ b/tgx/utils/graph_utils.py @@ -24,7 +24,8 @@ def ceiling_division(n, d): def discretize_edges(edgelist: dict, time_scale: Union[int,str], - store_unix: Optional[bool] = False) -> list: + store_unix: Optional[bool] = False, + freq_weight: Optional[bool] = False) -> list: """ util function for discretizing edgelist, expected timestamp on edges are unixtimestamp this func supports discretization of edge timestamp @@ -34,6 +35,7 @@ def discretize_edges(edgelist: dict, edgelist: dict, dictionary of edges time_scale: int or str, time interval to discretize the graph store_unix: bool, whether to return the converted timestamps in unix format + freq_weight: bool, whether to weight the edges based on their frequency Returns: output list: the first item in the list is always the updated edgelist (dict, dictionary of edges with discretized timestamps) and the second item is the converted timestamps in unix format (list) if store_unix is True """ @@ -73,9 +75,17 @@ def discretize_edges(edgelist: dict, for edge in edges_list: if bin_ts not in updated_edgelist: - updated_edgelist[bin_ts] = [edge] + updated_edgelist[bin_ts] = {edge: 1} + #updated_edgelist[bin_ts] = [edge] else: - updated_edgelist[bin_ts].append(edge) + if (not freq_weight): + updated_edgelist[bin_ts][edge] = 1 + else: + if (edge in updated_edgelist[bin_ts]): + updated_edgelist[bin_ts] += 1 + else: + updated_edgelist[bin_ts][edge] = 1 + #updated_edgelist[bin_ts].append(edge) if (store_unix): unix_ts = start_time + int(ts // interval_size) * interval_size #round to the nearest start time