-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAbbyyOnlineSdk.py
102 lines (86 loc) · 3.16 KB
/
AbbyyOnlineSdk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python
# Usage: process.py <input file> <output file> [-language <Language>] [-pdf|-txt|-rtf|-docx|-xml]
import argparse
import base64
import getopt
import MultipartPostHandler
import os
import re
import sys
import time
import urllib2
import urllib
import xml.dom.minidom
class ProcessingSettings:
Language = "English"
OutputFormat = "txt"
class Task:
Status = "Unknown"
Id = None
DownloadUrl = None
def IsActive( self ):
if self.Status == "InProgress" or self.Status == "Queued":
return True
else:
return False
class AbbyyOnlineSdk:
ServerUrl = "http://cloud.ocrsdk.com/"
# To create an application and obtain a password,
# register at http://cloud.ocrsdk.com/Account/Register
# More info on getting your application id and password at
# http://ocrsdk.com/documentation/faq/#faq3
ApplicationId = "MyTestApp.txt"
Password = " SSGgSzbrcRywp2HMje4DGjgF"
Proxy = None
enableDebugging = 0
def ProcessImage( self, filePath, settings ):
urlParams = urllib.urlencode({
"language" : settings.Language,
"exportFormat" : settings.OutputFormat
})
requestUrl = self.ServerUrl + "processImage?" + urlParams
bodyParams = { "file" : open( filePath, "rb" ) }
request = urllib2.Request( requestUrl, None, self.buildAuthInfo() )
response = self.getOpener().open(request, bodyParams).read()
if response.find( '<Error>' ) != -1 :
return None
# Any response other than HTTP 200 means error - in this case exception will be thrown
# parse response xml and extract task ID
task = self.DecodeResponse( response )
return task
def GetTaskStatus( self, task ):
urlParams = urllib.urlencode( { "taskId" : task.Id } )
statusUrl = self.ServerUrl + "getTaskStatus?" + urlParams
request = urllib2.Request( statusUrl, None, self.buildAuthInfo() )
response = self.getOpener().open( request ).read()
task = self.DecodeResponse( response )
return task
def DownloadResult( self, task, outputPath ):
getResultParams = urllib.urlencode( { "taskId" : task.Id } )
getResultUrl = self.ServerUrl + "getResult?" + getResultParams
request = urllib2.Request( getResultUrl, None, self.buildAuthInfo() )
fileResponse = self.getOpener().open( request ).read()
resultFile = open( outputPath, "wb" )
resultFile.write( fileResponse )
def DecodeResponse( self, xmlResponse ):
""" Decode xml response of the server. Return Task object """
dom = xml.dom.minidom.parseString( xmlResponse )
taskNode = dom.getElementsByTagName( "task" )[0]
task = Task()
task.Id = taskNode.getAttribute( "id" )
task.Status = taskNode.getAttribute( "status" )
if task.Status == "Completed":
task.DownloadUrl = taskNode.getAttribute( "resultUrl" )
return task
def buildAuthInfo( self ):
return { "Authorization" : "Basic %s" % base64.b64encode( "%s:%s" % (self.ApplicationId, self.Password) ) }
def getOpener( self ):
if self.Proxy == None:
self.opener = urllib2.build_opener( MultipartPostHandler.MultipartPostHandler,
urllib2.HTTPHandler(debuglevel=self.enableDebugging))
else:
self.opener = urllib2.build_opener(
self.Proxy,
MultipartPostHandler.MultipartPostHandler,
urllib2.HTTPHandler(debuglevel=self.enableDebugging))
return self.opener