Skip to content
This repository has been archived by the owner on Aug 21, 2020. It is now read-only.

Commit

Permalink
add:图数据补充完成
Browse files Browse the repository at this point in the history
  • Loading branch information
CaribouW committed Mar 30, 2020
1 parent 12355c0 commit 2e3340f
Show file tree
Hide file tree
Showing 28 changed files with 504 additions and 17,145 deletions.
2 changes: 1 addition & 1 deletion dockerfiles/docker-compose-local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ services:
- MONGODB_PASSWORD=mongo
- MONGODB_DATABASE=se
neo4j:
image: neo4j:3.0
image: neo4j:latest
ports:
- "7473:7473"
- "7474:7474"
Expand Down
6,994 changes: 0 additions & 6,994 deletions oasis-data/affiliation_post.txt

This file was deleted.

6,994 changes: 0 additions & 6,994 deletions oasis-data/affiliations.txt

This file was deleted.

121 changes: 0 additions & 121 deletions oasis-data/conference_analyser.py

This file was deleted.

1,492 changes: 0 additions & 1,492 deletions oasis-data/conferences.txt

This file was deleted.

1,492 changes: 0 additions & 1,492 deletions oasis-data/conferences_post.txt

This file was deleted.

Empty file.
18 changes: 18 additions & 0 deletions oasis-data/graph-server/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from flask import Flask

# https://blog.csdn.net/qq_33528613/article/details/86602707
app = Flask(__name__)


@app.route('/papers')
def get_graph():
pass


@app.route('/')
def hello():
return 'hello'


if __name__ == '__main__':
app.run()
Empty file.
46 changes: 46 additions & 0 deletions oasis-data/graph-server/dao/paper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# dao logic for dao mapping
from py2neo import Graph, Record

graph = Graph("http://116.62.7.102:7474")


def build_nodes(record: 'Record'):
"""
构建nodes
:param record:
:return:
"""
ans = {}
for k, v in record['n'].items():
ans.setdefault(k, v)
return ans


def build_edges(record: 'Record'):
return dict(
start_node=record['p'].start_node['id'],
end_node=record['p'].end_node['id'],
type=list(record['p'].types())[0]
)


def query_post_process(node_set, edge_set):
"""
结果json逻辑封装
:param node_set:
:param edge_set:
:return:
"""
return dict(
nodes=list(map(build_nodes, node_set)),
edges=list(map(build_edges, edge_set))
)


if __name__ == '__main__':
# relation
pubs = graph.run(cypher='MATCH p=()-[r:publish]->() RETURN p LIMIT 25')
# node
authors = graph.run(cypher='MATCH (n:author) RETURN n LIMIT 25')

print(query_post_process(authors, pubs))
20 changes: 20 additions & 0 deletions oasis-data/graph-server/db_connecter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pymongo
from py2neo import Graph


def connect_mongodb(host, port):
mongo_client = pymongo.MongoClient(host, port)
se_db = mongo_client['se']
se_db.authenticate("root", "mongo")
return se_db


def connect_neo4j(host, port):
"""
https://blog.csdn.net/sinat_26917383/article/details/79901207
:param host:
:param port:
:return:
"""
graph = Graph("http://{}:{}".format(str(host), str(port)))
return graph
106 changes: 106 additions & 0 deletions oasis-data/graph-server/graph_construct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from py2neo import Node, Relationship, NodeMatcher

from db_connecter import connect_neo4j, connect_mongodb


def construct_graph():
"""
构建图数据
:return:
"""
host = 'localhost'
graph = connect_neo4j(host, 7474)
# graph.delete_all()
coll_names = ['authors', 'papers', 'affiliations', 'conferences', 'counterBases', 'fields']
db = connect_mongodb(host, 27017)
coll_field = db[coll_names[-1]]
coll_author = db[coll_names[0]]
coll_paper = db[coll_names[1]]
coll_conference = db[coll_names[3]]
coll_aff = db[coll_names[2]]

def paper_author():
"""
构建 paper - author 关系
:return:
"""
for paper in coll_paper.find():
paper_id = paper['_id']
selector = NodeMatcher(graph)
items = selector.match('paper', id=paper_id)
# 如果这个论文paper已经存在
if len(items) > 0:
paper_node = list(items)[0]
# 否则创建新节点
else:
paper_node = Node('paper', title=paper['title'], id=paper_id)
graph.create(paper_node)

for author_name in paper['authors'].split(';'):
# author entity
entity = coll_author.find_one({'authorName': author_name})
if entity is None: continue
# 查找图中是否有这个author
selector = NodeMatcher(graph)
items = selector.match('author', id=entity['_id'])
author_node = Node('author', authorName=author_name, id=entity['_id']) if len(items) == 0 else \
list(items)[0]
R = Relationship(author_node, 'publish', paper_node)
graph.create(R)

# 接下来处理论文和领域
def paper_conference():
for paper in coll_paper.find():
paper_id = paper['_id']
con_name = paper['conference']
selector = NodeMatcher(graph)
items = selector.match('paper', id=paper_id)
# 如果这个论文paper已经存在
if len(items) > 0:
paper_node = list(items)[0]
# 否则创建新节点
else:
paper_node = Node('paper', title=paper['title'], id=paper_id)
graph.create(paper_node)
entity = coll_conference.find_one({'conferenceName': con_name, 'year': paper['year']})
if entity is None: continue
en_id = str(entity['_id'])
selector = NodeMatcher(graph)
items = selector.match('conference', id=en_id)
con_node = Node('conference', conferenceName=con_name, id=en_id, year=entity['year']) if len(
items) == 0 else \
list(items)[0]
R = Relationship(paper_node, 'published_on', con_node)
graph.create(R)

def author_affiliation():
for author in coll_author.find():
aid, author_name, aff_name = author['_id'], author['authorName'], author['affiliationName']
selector = NodeMatcher(graph)
items = selector.match('author', id=aid)
# 如果这个论文paper已经存在
if len(items) > 0:
author_node = list(items)[0]
# 否则创建新节点
else:
author_node = Node('paper', title=author['authorName'], id=aid)
graph.create(author_node)

entity = coll_aff.find_one({'affiliationName': aff_name})
if entity is None: continue
# affiliation id
en_id = str(entity['_id'])
selector = NodeMatcher(graph)
items = selector.match('affiliation', id=en_id)
aff_node = Node('affiliation', affiliationName=aff_name, id=en_id) if len(items) == 0 else \
list(items)[0]
R = Relationship(author_node, 'work_in', aff_node)
graph.create(R)

# paper_author()
paper_conference()
# author_affiliation()


if __name__ == '__main__':
construct_graph()
Empty file.
59 changes: 59 additions & 0 deletions oasis-data/graph-server/models/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from py2neo import Graph
from py2neo.ogm import GraphObject, Property, RelatedTo, RelatedFrom


# 论文
class Paper(GraphObject):
__primarylabel__ = 'paper'

id = Property()
title = Property()
authors = RelatedFrom('Author', 'publish')
conference = RelatedTo('Conference', 'publish_on')


# 作者
class Author(GraphObject):
__primarylabel__ = 'author'

id = Property()
authorName = Property()
papers = RelatedTo('Paper', 'publish')
affiliation_ = RelatedTo('Affiliation', 'work_in')


# 机构
class Affiliation(GraphObject):
__primarylabel__ = 'affiliation'

id = property()
affiliationName = property()
authors = RelatedFrom('Author', 'work_in')


# 会议
class Conference(GraphObject):
__primarylabel__ = 'conference'

id = property()
conferenceName = property()
papers = RelatedFrom('Paper', 'publish_on')


# 领域
class Field(GraphObject):
__primarylabel__ = 'field'

id = property()
fieldName = property()


if __name__ == '__main__':
# https://py2neo.org/v4/ogm.html#graph-objects
graph = Graph("http://116.62.7.102:7474")
# matcher = NodeMatcher(graph)
res = iter(Paper.match(graph, primary_value=69602))
en = res.__next__()

for a in en.authors:
print(a)
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ pandas
sqlalchemy
html5lib
selenium
requests
requests
py2neo
pymongo
Loading

0 comments on commit 2e3340f

Please sign in to comment.