Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: get page and chunk size from env vars #1439

Merged
merged 3 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions tableauserverclient/config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
# TODO: check for env variables, else set default values
import os

ALLOWED_FILE_EXTENSIONS = ["tds", "tdsx", "tde", "hyper", "parquet"]

BYTES_PER_MB = 1024 * 1024

# For when a datasource is over 64MB, break it into 5MB(standard chunk size) chunks
CHUNK_SIZE_MB = 5 * 10 # 5MB felt too slow, upped it to 50

DELAY_SLEEP_SECONDS = 0.1

# The maximum size of a file that can be published in a single request is 64MB
FILESIZE_LIMIT_MB = 64


class Config:
# For when a datasource is over 64MB, break it into 5MB(standard chunk size) chunks
@property
def CHUNK_SIZE_MB(self):
return int(os.getenv("TSC_CHUNK_SIZE_MB", 5 * 10)) # 5MB felt too slow, upped it to 50

# Default page size
@property
def PAGE_SIZE(self):
return int(os.getenv("TSC_PAGE_SIZE", 100))


config = Config()
4 changes: 2 additions & 2 deletions tableauserverclient/server/endpoint/datasources_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from tableauserverclient.server.endpoint.permissions_endpoint import _PermissionsEndpoint
from tableauserverclient.server.endpoint.resource_tagger import _ResourceTagger

from tableauserverclient.config import ALLOWED_FILE_EXTENSIONS, FILESIZE_LIMIT_MB, BYTES_PER_MB, CHUNK_SIZE_MB
from tableauserverclient.config import ALLOWED_FILE_EXTENSIONS, FILESIZE_LIMIT_MB, BYTES_PER_MB, config
from tableauserverclient.filesys_helpers import (
make_download_path,
get_file_type,
Expand Down Expand Up @@ -272,7 +272,7 @@ def publish(
if file_size >= FILESIZE_LIMIT_MB * BYTES_PER_MB:
logger.info(
"Publishing {} to server with chunking method (datasource over {}MB, chunk size {}MB)".format(
filename, FILESIZE_LIMIT_MB, CHUNK_SIZE_MB
filename, FILESIZE_LIMIT_MB, config.CHUNK_SIZE_MB
)
)
upload_session_id = self.parent_srv.fileuploads.upload(file)
Expand Down
4 changes: 2 additions & 2 deletions tableauserverclient/server/endpoint/fileuploads_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from tableauserverclient import datetime_helpers as datetime
from tableauserverclient.helpers.logging import logger

from tableauserverclient.config import BYTES_PER_MB, CHUNK_SIZE_MB
from tableauserverclient.config import BYTES_PER_MB, config
from tableauserverclient.models import FileuploadItem
from tableauserverclient.server import RequestFactory

Expand Down Expand Up @@ -41,7 +41,7 @@ def _read_chunks(self, file):

try:
while True:
chunked_content = file_content.read(CHUNK_SIZE_MB * BYTES_PER_MB)
chunked_content = file_content.read(config.CHUNK_SIZE_MB * BYTES_PER_MB)
if not chunked_content:
break
yield chunked_content
Expand Down
3 changes: 2 additions & 1 deletion tableauserverclient/server/query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Sized
from itertools import count
from typing import Iterable, Iterator, List, Optional, Protocol, Tuple, TYPE_CHECKING, TypeVar, overload
from tableauserverclient.config import config
from tableauserverclient.models.pagination_item import PaginationItem
from tableauserverclient.server.filter import Filter
from tableauserverclient.server.request_options import RequestOptions
Expand Down Expand Up @@ -35,7 +36,7 @@ def to_camel_case(word: str) -> str:
class QuerySet(Iterable[T], Sized):
def __init__(self, model: "QuerysetEndpoint[T]", page_size: Optional[int] = None) -> None:
self.model = model
self.request_options = RequestOptions(pagesize=page_size or 100)
self.request_options = RequestOptions(pagesize=page_size or config.PAGE_SIZE)
self._result_cache: List[T] = []
self._pagination_item = PaginationItem()

Expand Down
5 changes: 3 additions & 2 deletions tableauserverclient/server/request_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing_extensions import Self

from tableauserverclient.config import config
from tableauserverclient.models.property_decorators import property_is_int
import logging

Expand Down Expand Up @@ -115,9 +116,9 @@ class Direction:
Desc = "desc"
Asc = "asc"

def __init__(self, pagenumber=1, pagesize=100):
def __init__(self, pagenumber=1, pagesize=None):
self.pagenumber = pagenumber
self.pagesize = pagesize
self.pagesize = pagesize or config.PAGE_SIZE
self.sort = set()
self.filter = set()

Expand Down
25 changes: 25 additions & 0 deletions test/test_fileuploads.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import contextlib
import io
import os
import unittest

import requests_mock

from tableauserverclient.config import BYTES_PER_MB, config
from tableauserverclient.server import Server
from ._utils import asset

Expand All @@ -11,6 +14,17 @@
FILEUPLOAD_APPEND = os.path.join(TEST_ASSET_DIR, "fileupload_append.xml")


@contextlib.contextmanager
def set_env(**environ):
old_environ = dict(os.environ)
os.environ.update(environ)
try:
yield
finally:
os.environ.clear()
os.environ.update(old_environ)


class FileuploadsTests(unittest.TestCase):
def setUp(self):
self.server = Server("http://test", False)
Expand Down Expand Up @@ -62,3 +76,14 @@ def test_upload_chunks_file_object(self):
actual = self.server.fileuploads.upload(file_content)

self.assertEqual(upload_id, actual)

def test_upload_chunks_config(self):
data = io.BytesIO()
data.write(b"1" * (config.CHUNK_SIZE_MB * BYTES_PER_MB + 1))
data.seek(0)
with set_env(TSC_CHUNK_SIZE_MB="1"):
chunker = self.server.fileuploads._read_chunks(data)
chunk = next(chunker)
assert len(chunk) == config.CHUNK_SIZE_MB * BYTES_PER_MB
data.seek(0)
assert len(chunk) < len(data.read())
25 changes: 25 additions & 0 deletions test/test_pager.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import contextlib
import os
import unittest

import requests_mock

import tableauserverclient as TSC
from tableauserverclient.config import config

TEST_ASSET_DIR = os.path.join(os.path.dirname(__file__), "assets")

Expand All @@ -12,6 +14,17 @@
GET_XML_PAGE3 = os.path.join(TEST_ASSET_DIR, "workbook_get_page_3.xml")


@contextlib.contextmanager
def set_env(**environ):
old_environ = dict(os.environ)
os.environ.update(environ)
try:
yield
finally:
os.environ.clear()
os.environ.update(old_environ)


class PagerTests(unittest.TestCase):
def setUp(self):
self.server = TSC.Server("http://test", False)
Expand Down Expand Up @@ -88,3 +101,15 @@ def test_pager_with_options(self):
# Should have the last workbook
wb3 = workbooks.pop()
self.assertEqual(wb3.name, "Page3Workbook")

def test_pager_with_env_var(self):
with set_env(TSC_PAGE_SIZE="1000"):
assert config.PAGE_SIZE == 1000
loop = TSC.Pager(self.server.workbooks)
assert loop._options.pagesize == 1000

def test_queryset_with_env_var(self):
with set_env(TSC_PAGE_SIZE="1000"):
assert config.PAGE_SIZE == 1000
loop = self.server.workbooks.all()
assert loop.request_options.pagesize == 1000
Loading