Skip to content

Commit

Permalink
Merge pull request #88 from fani-lab/87-uspt-gender-labels-and-experi…
Browse files Browse the repository at this point in the history
…ments-gabriel-edwin

Added gender mappings code for USPT
  • Loading branch information
Hamedloghmani authored Dec 1, 2023
2 parents 324b943 + d8212b9 commit 739d422
Showing 1 changed file with 44 additions and 7 deletions.
51 changes: 44 additions & 7 deletions src/util/mappingGender/mappingGender.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ def __init__(self, opeNTF_output_dir: str) -> None:
self.memberId_2_i = {}
self.df = None

with open(opeNTF_output_dir, "rb") as f:
self.opeNTF_out = pkl.load(f)
if(opeNTF_output_dir):
with open(opeNTF_output_dir, "rb") as f:
self.opeNTF_out = pkl.load(f)


# Generates the Mapping for Member ID to opeNTF index
Expand Down Expand Up @@ -169,6 +170,31 @@ def findGenderResults_DBLP_v2(self, dblp_json_dir :str):

self.df = pd.DataFrame.from_dict(data, orient="index", columns=["gender"])

def generate_mapping_uspt(self, teams_pkl: str, indexes_pkl: str):
"""
Generates gender.csv file to map OpeNTF ID to gender value
True: Male, False: Female, Null: null
Args:
teams_pkl: location of teams.pkl file for uspt
indexes_pkl: location of indexes.pkl file for uspt
Return:
None
"""
mappings = {}

with open(teams_pkl, "rb") as f_1:
with open(indexes_pkl, "rb") as f_2:
teams_pkl = pkl.load(f_1)
indexes_pkl = pkl.load(f_2)
c2i = indexes_pkl['c2i']

for patent in teams_pkl:
for member in patent.members:
ind = c2i[member.id + "_" + member.name]
if(ind not in mappings):
mappings[ind] = member.gender

self.df = pd.DataFrame.from_dict(mappings, orient="index", columns=["gender"])



Expand Down Expand Up @@ -196,15 +222,26 @@ def importResults(self, directory):
# imdbMapGender.exportResults_toPickle('data/preprocessed/imdb/i2gender.pkl')


dblpMapGender = MappingGender('data/preprocessed/dblp/dblp.v12.json/indexes.pkl')
# dblpMapGender = MappingGender('data/preprocessed/dblp/dblp.v12.json/indexes.pkl')

dblpMapGender.createMemberID_2_i_DBLP()
# dblpMapGender.createMemberID_2_i_DBLP()

dblpMapGender.findGenderResults_DBLP_v2('../dblp_labelledGender_updated.json')
# dblpMapGender.findGenderResults_DBLP_v2('../dblp_labelledGender_updated.json')

dblpMapGender.exportResults_toCSV('data/preprocessed/dblp/i2gender.csv')
# dblpMapGender.exportResults_toCSV('data/preprocessed/dblp/i2gender.csv')

dblpMapGender.exportResults_toPickle('data/preprocessed/dblp/i2gender.pkl')
# dblpMapGender.exportResults_toPickle('data/preprocessed/dblp/i2gender.pkl')



# USPT:

uspt_map_gender = MappingGender(None)

uspt_map_gender.generate_mapping_uspt(teams_pkl="data/preprocessed/uspt/patent.tsv.filtered.mt75.ts3/teams.pkl",
indexes_pkl="data/preprocessed/uspt/patent.tsv.filtered.mt75.ts3/indexes.pkl")


uspt_map_gender.exportResults_toCSV("data/preprocessed/uspt/patent.tsv.filtered.mt75.ts3/gender.csv")
uspt_map_gender.exportResults_toPickle("data/preprocessed/uspt/patent.tsv.filtered.mt75.ts3/gender.pkl")

0 comments on commit 739d422

Please sign in to comment.