-
Notifications
You must be signed in to change notification settings - Fork 0
/
super_pred.py
52 lines (36 loc) · 1.1 KB
/
super_pred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# This file is aim to predict class label for given Test sample... class from set of super classes
import numpy as np
import pandas as pd
import sklearn as sk
import urllib
from urllib.request import urlopen
from bs4 import BeautifulSoup as bs
import requests
def get_Abstract(paperId):
url="https://www.ncbi.nlm.nih.gov/pubmed/"+str(paperId)
request = requests.get(url)
if(request.status_code ==200):
try:
page = urlopen(url)
content = page.read()
soup = bs(content)
abstract_tag = soup.findAll('abstracttext')
abstract_text=''.join([v.string for v in abstract_tag])
text = abstract_text
except urllib.error.HTTPError as err:
print("Page not Exists")
else:
text=""
return text
data=pd.read_csv('Dataset/Heart.csv',index_col=0)
PubMed_nr=data['Pubmed_ID']
i=0
abstracts=np.array([])
for k in PubMed_nr:
print(k)
text=get_Abstract(k)
abstracts=np.append(abstracts,text)
#i=i+1
data['abstract']=abstracts
data.to_csv('Heart_abstract.csv',index=False)
#print(abstracts)