Skip to content

Commit

Permalink
Add keywords in MeetingHansard #18
Browse files Browse the repository at this point in the history
  • Loading branch information
howawong committed Mar 25, 2018
1 parent 257ba3b commit 9cac091
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
21 changes: 19 additions & 2 deletions api_server/legco/management/commands/load_hansard_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.db import IntegrityError
from django.core.management.base import BaseCommand, CommandError
from django.core.exceptions import ObjectDoesNotExist
from legco.models import Keyword
from legco.models import Meeting, Vote, Motion, Individual, IndividualVote, VoteSummary, Party, MeetingHansard, MeetingSpeech, MeetingPersonel
from dateutil.parser import *
import os
Expand All @@ -17,6 +18,9 @@
import sys
import json
from datetime import date, datetime
from collections import Counter
from textrank4zh import TextRank4Keyword, TextRank4Sentence


class Command(BaseCommand):
help = 'Load hansard JSON into database'
Expand All @@ -31,7 +35,8 @@ def delete_existing_hansard(self, url):
hansard.speeches,
hansard.members_absent,
hansard.public_officers,
hansard.clerks]
hansard.clerks,
hansard.keywords]
for q in queries:
for o in q.all():
o.delete()
Expand Down Expand Up @@ -110,7 +115,19 @@ def handle(self, *args, **options):
hansard.public_officers.add(p)
for c in clerks:
hansard.clerks.add(c)

all_s = ""
for s in speeches:
all_s += s.text_ch + "\n"
hansard.speeches.add(s)
print("New hansard ID=%d" % hansard.id)
#print("Calculating keyword")
#tr4w = TextRank4Keyword(allow_speech_tags=["nr", "ns", "nz", "nt"])
#tr4w.analyze(text=all_s, lower=True, window=3)
#for item in tr4w.get_keywords(20, word_min_len=3):
# keyword = item.word
# m, created = Keyword.objects.get_or_create(keyword = keyword)
# m.keyword = keyword
# print(keyword)
# hansard.keywords.add(m)
#print("New hansard ID=%d" % hansard.id)
hansard.save()
1 change: 1 addition & 0 deletions api_server/legco/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class MeetingHansard(models.Model):
key = models.CharField(max_length=128, unique=True)
source_url = models.CharField(max_length=2048)
speeches = models.ManyToManyField(MeetingSpeech, related_name='hansard')
keywords = models.ManyToManyField(Keyword)
members_present = models.ManyToManyField(MeetingPersonel, related_name='present')
members_absent = models.ManyToManyField(MeetingPersonel, related_name='absent')
public_officers = models.ManyToManyField(MeetingPersonel, related_name='officers')
Expand Down
1 change: 1 addition & 0 deletions api_server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ python-dateutil==2.6.1
w3lib==1.19.0
jieba==0.39
lxml==4.1.1
textrank4zh==0.3

0 comments on commit 9cac091

Please sign in to comment.