Skip to content

Commit

Permalink
Adjust analysis scripts to new database schema
Browse files Browse the repository at this point in the history
  • Loading branch information
dennis-tra committed Sep 27, 2021
1 parent 517d0bd commit 8fd062d
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 21 deletions.
29 changes: 25 additions & 4 deletions analysis/mixed/lib/node_agent.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,35 @@
# get_agent_version gets the agent version info of given peers.
# It takes an sql connection, the peer ids as arguments, and
# It takes an sql connection, the peer **database** ids as arguments, and
# returns the agent version info of these peer ids.
def get_agent_version(conn, peer_ids):
cur = conn.cursor()
res = dict()
cur.execute(
"""
SELECT id, agent_version
FROM peers
WHERE id IN (%s)
SELECT p.id, agent_version
FROM peers p
INNER JOIN agent_versions av on av.id = p.agent_version_id
WHERE p.id IN (%s)
""" % ','.join(['%s'] * len(peer_ids)),
tuple(peer_ids)
)
for id, agent in cur.fetchall():
res[id] = agent
return res


# get_agent_version_multi_hash gets the agent version info of given peers.
# It takes an sql connection, the peer ID multi hashes as arguments, and
# returns the agent version info of these peer ids.
def get_agent_version_multi_hash(conn, peer_ids):
cur = conn.cursor()
res = dict()
cur.execute(
"""
SELECT p.id, agent_version
FROM peers p
INNER JOIN agent_versions av on av.id = p.agent_version_id
WHERE p.multi_hash IN (%s)
""" % ','.join(['%s'] * len(peer_ids)),
tuple(peer_ids)
)
Expand Down
4 changes: 2 additions & 2 deletions analysis/mixed/lib/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_on_nodes(conn, start, end):
WHERE (created_at < %s AND updated_at > %s AND first_successful_dial != last_successful_dial) OR (created_at < %s AND finished = false) AND peer_id NOT IN (
SELECT peer_id
FROM sessions
WHERE updated_at > %s AND updated_At < %s AND finished = true
WHERE updated_at > %s AND updated_at < %s AND finished = true
)
""",
[end, end, end, start, end]
Expand All @@ -55,7 +55,7 @@ def get_off_nodes(conn, start, end):
WHERE created_at < %s AND updated_at > %s AND first_successful_dial = last_successful_dial AND finished = true AND peer_id NOT IN (
SELECT peer_id
FROM sessions
WHERE updated_at > %s AND updated_At < %s AND first_successful_dial != last_successful_dial AND finished = true
WHERE updated_at > %s AND updated_at < %s AND first_successful_dial != last_successful_dial AND finished = true
)
""",
[end, start, start, end]
Expand Down
12 changes: 7 additions & 5 deletions analysis/mixed/lib/node_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@


# get_cloud gets the cloud info of given peers.
# It takes an sql connection, the peer ids as arguments, and
# It takes an sql connection, the peer **database** ids as arguments, and
# returns the cloud info of these peer ids.
def get_cloud(conn, peer_ids):
cur = conn.cursor()
res = dict()
cur.execute(
"""
SELECT id, multi_addresses
FROM peers
WHERE id IN (%s)
SELECT p.id, ma.maddr
FROM peers p
INNER JOIN peers_x_multi_addresses pxma on p.id = pxma.peer_id
INNER JOIN multi_addresses ma on pxma.multi_address_id = ma.id
WHERE p.id IN (%s)
""" % ','.join(['%s'] * len(peer_ids)),
tuple(peer_ids)
)
Expand All @@ -31,7 +33,7 @@ def get_cloud(conn, peer_ids):
page = requests.get(azure_url)
tree = html.fromstring(page.content)
download_url = tree.xpath("//a[contains(@class, 'failoverLink') and "
"contains(@href,'download.microsoft.com/download/')]/@href")[0]
"contains(@href,'download.microsoft.com/download/')]/@href")[0]
azure_ips = requests.get(download_url, allow_redirects=True).json()
azure_prefixes = set()
for item in azure_ips["values"]:
Expand Down
8 changes: 5 additions & 3 deletions analysis/mixed/lib/node_geolocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ def get_geolocation(conn, peer_ids):
res = dict()
cur.execute(
"""
SELECT id, multi_addresses
FROM peers
WHERE id IN (%s)
SELECT p.id, ma.maddr
FROM peers p
INNER JOIN peers_x_multi_addresses pxma on p.id = pxma.peer_id
INNER JOIN multi_addresses ma on pxma.multi_address_id = ma.id
WHERE p.id IN (%s)
""" % ','.join(['%s'] * len(peer_ids)),
tuple(peer_ids)
)
Expand Down
19 changes: 19 additions & 0 deletions analysis/mixed/lib/node_multi_addresses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# get_multi_addresses gets the multi addresses of the given peers.
# It takes an sql connection, the peer **database** ids as arguments, and
# returns the agent version info of these peer ids.
def get_multi_addresses(conn, peer_ids):
cur = conn.cursor()
res = dict()
cur.execute(
"""
SELECT p.id, ma.maddr
FROM peers p
INNER JOIN peers_x_multi_addresses pxma on p.id = pxma.peer_id
INNER JOIN multi_addresses ma on pxma.multi_address_id = ma.id
WHERE p.id IN (%s)
""" % ','.join(['%s'] * len(peer_ids)),
tuple(peer_ids)
)
for id, maddr in cur.fetchall():
res[id] = maddr
return res
8 changes: 5 additions & 3 deletions analysis/mixed/lib/node_ping.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ async def check_node_ping_async(conn, peer_ids):
res = dict()
cur.execute(
"""
SELECT id, multi_addresses
FROM peers
WHERE id IN (%s)
SELECT p.id, ma.maddr
FROM peers p
INNER JOIN peers_x_multi_addresses pxma on p.id = pxma.peer_id
INNER JOIN multi_addresses ma on pxma.multi_address_id = ma.id
WHERE p.id IN (%s)
""" % ','.join(['%s'] * len(peer_ids)),
tuple(peer_ids)
)
Expand Down
9 changes: 6 additions & 3 deletions analysis/mixed/lib/node_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@ def get_agent_protocol(conn, peer_ids):
res = dict()
cur.execute(
"""
SELECT id, protocol
FROM peers
WHERE id IN (%s)
SELECT sq.id, array_agg(prot.protocol)
FROM protocols prot INNER JOIN (
SELECT p.id, unnest(ps.protocol_ids) protocol_id
FROM peers p INNER JOIN protocols_sets ps ON ps.id = p.protocols_set_id
WHERE p.id IN (%s)
) AS sq ON sq.protocol_id = prot.id GROUP BY 1
""" % ','.join(['%s'] * len(peer_ids)),
tuple(peer_ids)
)
Expand Down
2 changes: 1 addition & 1 deletion analysis/mixed/plot_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@
labels=["off nodes %d" % len(off), "on nodes %d" % len(on), "dangling nodes %d" % len(dangle)],
autopct="%.1f%%")
plt.title("Node classification from %s to %s" % (start.replace(microsecond=0), end.replace(microsecond=0)))
plt.show()
plt.show()

0 comments on commit 8fd062d

Please sign in to comment.