-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfindRepinDesc.py
executable file
·42 lines (34 loc) · 1.49 KB
/
findRepinDesc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/python
import os
import re
import csv
import logging
import gzip
import re
#loggin setup
logging.basicConfig(filename="repinDesc.log", filemode="a", level=logging.INFO, format="[ %(asctime)s ] %(levelname)s : %(message)s")
saida = open("repinDesc.txt","w")
#build header
saida.write("pinId;nLikes;nRepins;isRepin;desc\n")
for arq in os.listdir("./profiles"):
try:
logging.info("["+arq+"]: iniciando" )
for board in os.listdir("./profiles/"+arq+"/boards/"):
try:
photos = open('./profiles/'+arq+'/boards/'+board+'/timeline',"r")
csvPhotos = csv.reader(photos,delimiter=";")
for photo in csvPhotos:
pinPage = gzip.open('./profiles/'+arq+'/boards/'+board+'/'+photo[0],"r").read()
desc = re.findall('<meta property="og:description" name="og:description" content="(.*)" data-app="">', pinPage)[0].strip()
repin = re.findall('<h3 class="title">Repinned from</h3>',pinPage)
likes = re.findall('<meta property="pinterestapp:likes" name="pinterestapp:likes" content="(.*)" data-app="">',pinPage)[0].strip()
repins = re.findall('<meta property="pinterestapp:repins" name="pinterestapp:repins" content="(.*)" data-app="">',pinPage)[0].strip()
if repin:
repin = 1
else:
repin = 0
saida.write(photo[0]+";"+str(likes)+";"+str(repins)+";" + str(repin) +";"+ desc+"\n")
except :
logging.error("["+ arq+"]: nao tem o seguinte board attribute: "+board)
except :
logging.error("["+ arq+"]: nao existe mais")