-
Notifications
You must be signed in to change notification settings - Fork 6
/
stream.py
126 lines (103 loc) · 4.13 KB
/
stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# GNU Lesser General Public License v3.0 only
# Copyright (C) 2020 Artefact
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from datetime import datetime
import time
import io
class Stream(object):
_name = None
_source_generator = None
extension = None
mime_type = "application/octet-stream"
def __init__(self, name, source_generator):
"""
_source_generator is a generator yielding dicts
"""
self._name = self.create_stream_name(name)
self._source_generator = source_generator
self._iterator = iter(source_generator)
def __len__(self):
return self._source_generator.__len__()
def __iter__(self):
"""
The raw stream object can also be iterated.
You'll get the raw elements yielded by the generator.
"""
return self._iterator
def as_file(self) -> io.BufferedReader:
return self._iterable_to_stream(self._iterator, self.encode_record_as_bytes)
def readlines(self):
"""
Yield each element of a the generator, one by one.
(ex: line by line for file)
"""
for record in self:
yield self.decode_record(self.encode_record(record))
@classmethod
def create_from_stream(cls, source_stream):
if isinstance(source_stream, cls):
return source_stream
return cls(source_stream.name, source_stream.readlines())
@classmethod
def encode_record_as_bytes(cls, record) -> bytes:
return (cls.encode_record(record) + "\n").encode("utf-8")
@classmethod
def encode_record(cls, record) -> str:
raise NotImplementedError
@classmethod
def decode_record(cls, record):
raise NotImplementedError
@staticmethod
def create_stream_name(name):
ts = time.time()
ts_as_string = datetime.fromtimestamp(ts).strftime("%Y-%m-%d-%H-%M-%S")
return f"{name}_{ts_as_string}"
@property
def name(self):
return ".".join(filter(None, [self._name, self.extension]))
@staticmethod
def _iterable_to_stream(iterable, encode, buffer_size=io.DEFAULT_BUFFER_SIZE):
"""
Credit goes to 'Mechanical snail'
at https://stackoverflow.com/questions/6657820/python-convert-an-iterable-to-a-stream
Lets you use an iterable (e.g. a generator) that yields bytestrings as a
read-only
input stream.
The stream implements Python 3's newer I/O API (available in Python 2's io
module).
For efficiency, the stream is buffered.
"""
class IterStream(io.RawIOBase):
def __init__(self):
self.leftover = None
self.count = 0
def readable(self):
return True
def readinto(self, b):
try:
chunk_length = len(b) # We're supposed to return at most this much
chunk = self.leftover or encode(next(iterable))
output, self.leftover = chunk[:chunk_length], chunk[chunk_length:]
b[: len(output)] = output
self.count += len(output)
return len(output)
except StopIteration:
return 0 # indicate EOF
# tell should be implemented for GCS
def tell(self):
return self.count
return io.BufferedReader(IterStream(), buffer_size=buffer_size)