forked from leylaso/django-library
-
Notifications
You must be signed in to change notification settings - Fork 0
/
importcsv.py
executable file
·137 lines (116 loc) · 3.37 KB
/
importcsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import csv
from library.models import *
import re
import os
def clearDB():
# TODO make sure to purge secret data from here before committing this!
os.system("mysql -u dira_library -p dira_library < trash.sql")
def csvOpen(csvfile, delimiter="\t", quotechar='"'):
return csv.reader(open(csvfile, 'rb'), delimiter=delimiter, quotechar=quotechar)
def saveBook(row, language):
return ''
def saveCat(category):
cat = Category.objects.filter(title=category)
if cat:
return cat[0]
else:
cat = Category(title=category)
cat.save()
return cat
def saveAuthor(sur, give = ''):
auth = Author.objects.filter(surname=sur, givenames=give)
if auth:
return auth[0]
else:
auth = Author(surname=sur, givenames=give)
auth.save()
return auth
def savePub(name):
pub = Publisher.objects.filter(name=name)
if pub:
return pub[0]
else:
pub = Publisher(name=name)
pub.save()
return pub
def printLine(row, ofile = 'output.csv'):
of = open(ofile, 'a')
line = "\t".join(row)
of.write(line + "\n")
of.close
def process(csv, fields = ['author', 'title', 'year', 'publisher'], language='eng', category='Anarchism'):
# First save the category if necessary
cat = saveCat(category)
valid = 0
for row in csv:
if rowLength(row) > 0:
if validRow(row, fields):
valid += 1
count = 0
book = {}
while count < len(fields):
book[fields[count]] = row[count]
print fields[count] + " " + row[count] + " ."
count += 1
print "\n"
# Save the category if available
if book.has_key('category'):
cat = saveCat(book['category'])
# Save the author
authNames = book['author'].split(',')
if len(authNames) > 1:
auth = saveAuthor(authNames[0].strip(), authNames[1].strip())
elif len(authNames) == 1:
auth = saveAuthor(authNames[0].strip())
# Save the publisher
if len(book['publisher']) > 0:
pub = savePub(book['publisher'])
try:
book = Book(title=book['title'], year=book['year'], category=cat, publisher=pub, language=language)
book.save()
book.author.add(auth)
book.save()
except:
printLine(row)
else:
printLine(row)
return str(valid) + " rows imported\n"
def validInfo(csv, fields = ['author', 'title', 'year', 'publisher', 'genre']):
valid = 0
invalid = 0
empty = 0
for row in csv:
if rowLength(row) == 0:
empty += 1
elif validRow(row, fields):
valid += 1
else:
invalid += 1
return str(valid) + " valid rows\n" + str(invalid) + " invalid rows\n" + str(empty) + " empty rows\n"
def rowLength(row):
length = 0
for field in row:
length += len(field)
return length
def validRow(row, fields = ['author', 'title', 'year', 'publisher', 'genre']):
count = 0
validFields = 0
while count < len(fields):
exec 'if ' + fields[count] + 'Valid(row[count]): validFields += 1'
count += 1
if validFields == len(fields):
return True
else:
return False
def authorValid(text):
return re.match('^[^, ]*[^,]+[, ]+[^,]+$', text)
def titleValid(text):
return len(text) < 257
def yearValid(text):
return len(text) < 5
def publisherValid(text):
return len(text) < 257
def genreValid(text):
return len(text) < 257
def categoryValid(text):
return len(text) < 257