-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcategories.py
57 lines (39 loc) · 1.43 KB
/
categories.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 17 15:12:48 2019
@author: Lukas
"""
#import pandas as pd
#import re
#import numpy as np
import json
def find_cat_labels(json_path):
# making a dictionary with category labels
with open(json_path) as json_file:
json_data = json.load(json_file)
#print(d)
cats = dict()
for i in range(len(json_data["items"])):
cat = json_data["items"][i]["snippet"]["title"]
lab = json_data["items"][i]["id"]
cats[lab] = cat
#print(json_data["items"][i]["id"])
return cats
#print(find_cat_labels('Datasets/CA_category_id.json'))
#Categories we want to use: 10,15,20,22,26,30
# gives error, but shows all category labels are the same, except nothing is known for US, which is checked manually, do to lack of time
"""
categories = ["CA", "DE", "FR", "GB", "IN", "JP", "KR", "MX", "RU", "US"]
all_files = glob.glob('Datasets/**_category_id.json')
for f in range(len(all_files)):
with open(all_files[f]) as json_file:
json_data = json.load(json_file)
with open(all_files[f+1]) as json_file2:
json_data2 = json.load(json_file2)
for i in range(len(json_data["items"])):
cat1 = json_data["items"][i]["snippet"]["title"]
cat2 = json_data2["items"][i]["snippet"]["title"]
print(f)
if cat1 != cat2:
print(cat1, cat2)
"""