forked from Liyubov/citizen_science
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcitizen_science_data_analysis.py
146 lines (89 loc) · 4.38 KB
/
citizen_science_data_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
# coding: utf-8
# ## Observation from the Open City Nature Challange
#
# RQ1
# Can we identify macroscopic laws of citizen science projects so that long term dynamics is different?
#
# RQ2
# How communities are growing in time, what drives their growth if not preferential attachment or exogeneous factors?
#
# ## Data
# Muki downloaded all the CNC observations for the bounding box of Europe (metadata below) that includes the geographical locations - it's 62K observations.
#
# Query quality_grade=any&identifications=any&swlat=35.327868&swlng=-15.438348&nelat=61.352386&nelng=32.898351&projects[]=city-nature-challenge-2019
# Columns id, observed_on_string, observed_on, time_observed_at, time_zone, out_of_range, user_id, user_login, created_at, updated_at, quality_grade, license, url, image_url, sound_url, tag_list, description, id_please, num_identification_agreements, num_identification_disagreements, captive_cultivated, oauth_application_id, place_guess, latitude, longitude, positional_accuracy, geoprivacy, taxon_geoprivacy, coordinates_obscured, positioning_method, positioning_device, species_guess, scientific_name, common_name, iconic_taxon_name, taxon_id, taxon_kingdom_name, taxon_phylum_name, taxon_subphylum_name, taxon_superclass_name, taxon_class_name, taxon_subclass_name, taxon_superorder_name, taxon_order_name, taxon_suborder_name, taxon_superfamily_name, taxon_family_name, taxon_subfamily_name, taxon_supertribe_name, taxon_tribe_name, taxon_subtribe_name, taxon_genus_name, taxon_genushybrid_name, taxon_species_name, taxon_hybrid_name, taxon_subspecies_name, taxon_variety_name, taxon_form_name
#
# We will focus in particular on data from UK and in particular on London (51.5074° N, 0.1278° W).
# Shape of the data is 62246, 58.
# In[1]:
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
import csv
# load data on trajectories, it is very heavy
df_cit_sci = pd.read_csv('C:/Users/lyubo/Documents/DATA_networks/data_citizen_science/observations-65163_space.csv')
print(df_cit_sci.shape)
# In[2]:
df_cit_sci.head(10)
# In[2]:
print(df_cit_sci.columns)
# ### Plot data about participants and findings on a map
#
# From 62246 unique records of users we plot their distribution on a map.
# In[9]:
import math
import matplotlib.pyplot as plt
import seaborn
from mpl_toolkits.basemap import Basemap
# setup Lambert Conformal basemap.
# set resolution=None to skip processing of boundary datasets.
# Create a map on which to draw.
# Use mercator projection, and showing the whole world.
fig, ax = plt.subplots(figsize=(60, 60))
# Berlin Latitude: 52.520008, longitude: 13.404954.
# NYC 40.730610, and the longitude is -73.935242.
# We want to plot only Berlin surrounding areas
m = Basemap(projection='merc',llcrnrlat=49,urcrnrlat=59,llcrnrlon=-8.5,urcrnrlon=3,lat_ts=20,resolution='c')
# Draw coastlines, and the edges of the map. NASA bluemarble
m.shadedrelief() #m.bluemarble()
m.drawcoastlines()
m.drawmapboundary()
# Convert latitude and longitude to x and y coordinates
#TODO: verify if x and y correspond well to lon, lat
x, y = m(list(df_cit_sci["longitude"].astype(float)), list(df_cit_sci["latitude"].astype(float)))
#print(type(x))
# Use matplotlib to draw the points onto the map.
m.scatter(x,y,1,marker='o',color='red')
# Show the plot.
plt.show()
#
# # Plot points of data on a map with geometry
# Using osmnx module we want to plot trajectories on a map
#
# Work in progress on
# 1. visualisation of the spatial patterns over time
# 2. analysis of communities formations and population of the platform over time
# In[3]:
import folium
#first we get datapoints from the file
latitude = df_cit_sci.latitude.values
longitude = df_cit_sci.longitude.values
#then we zip two arrays of lat, lon of datapoints
latlon = list(zip(latitude, longitude))
#print(latlon)
# In[ ]:
#We put map to show first location of London latitude 51.5074° N, longitude 0.1278° W
mapit = folium.Map( location=[51.5074 , 0.1278]) #, zoom_start=6
for coord in latlon:
folium.Marker( location=[ coord[0], coord[1] ], fill_color='#43d9de', radius=8 ).add_to( mapit )
mapit
#draw connecting line between locations written in latlon file
'''
TODO:
to add information to nodes of the map with added info about cities;
plot curved ages multigraph
'''
SAF=folium.PolyLine(locations=latlon,weight=5,color = 'red')
mapit.add_child(SAF)
# In[ ]: