-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcopypaste.proto
121 lines (103 loc) · 2.63 KB
/
copypaste.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
syntax = "proto3";
package achivx.copypaste;
/**
* An interface for text document storage with similar documents search support.
*/
service Copypaste {
/**
* Write a document to the storage.
*
* Will overwrite existing document with the same id if it exists.
*/
rpc RememberDocument(Document) returns (StoredDocumentResponse);
/**
* Remove document from the storage.
*/
rpc ForgetDocument(DocumentRequest) returns (Empty);
/**
* Get document contents from the storage.
*/
rpc GetDocument(DocumentRequest) returns (Document);
/**
* Check given document for similarities with documents in the storage.
*
* Returns a stream of similar documents.
* The stream is empty if none of documents in the storage are similar enough to the given document.
*/
rpc CheckDocument(CheckDocumentRequest) returns (stream SimilarDocument);
}
/**
* Document metadata entry.
*
* Document metadata is a set of key-value pairs that is stored along with the document.
* It can be accessed when fetching document contents using `GetDocument`.
* It also can be accessed by custom text preprocessors in order to, for example, use different preprocessing algorithms
* for texts in different languages when text language is stored as a metadata entry.
*/
message DocumentMeta {
string name = 1;
string value = 2;
}
message Document {
/**
* Document's unique id.
*
* Must be present and unique.
*/
string id = 1;
/**
* Document text.
*
* May be split up into few strings.
*/
repeated string text = 2;
/**
* Document metadata.
*
* See `DocumentMeta`.
*/
repeated DocumentMeta meta = 3;
}
message StoredDocumentResponse {
string id = 1;
}
message DocumentRequest {
string id = 1;
}
message Empty {}
message CheckDocumentRequest {
/**
* Text of the document to check.
*/
repeated string text = 2;
/**
* Metadata of the document to check.
*/
repeated DocumentMeta meta = 3;
/**
* Maximal number of similar documents to return.
*
* If omitted, at most one document will be returned.
*/
optional int64 maxSimilar = 4;
}
message SimilarDocument {
/**
* Id of the found similar document.
*
* Full content and metadata of the document can be accessed by calling `GetDocument`.
*/
string id = 1;
/**
* Relative similarity of documents.
*
* A number between 0.0 (exclusive) and 1.0 (inclusive).
*/
float relSimilarity = 2;
/**
* Absolute similarity of documents.
*
* An integer representing number of matching elements between the checked document and the similar stored document.
*/
int64 absSimilarity = 3;
}