From e228729d81367d3d3d42f66d3eac39ae16df1af4 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 5 Oct 2023 13:17:24 +0200 Subject: [PATCH 01/15] does not propagate source section when standalone resources are extracted to preserve their config sections --- dlt/common/configuration/specs/config_section_context.py | 4 +++- dlt/pipeline/pipeline.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dlt/common/configuration/specs/config_section_context.py b/dlt/common/configuration/specs/config_section_context.py index e251b4f01b..753eb3b439 100644 --- a/dlt/common/configuration/specs/config_section_context.py +++ b/dlt/common/configuration/specs/config_section_context.py @@ -49,7 +49,9 @@ def resource_merge_style(incoming: "ConfigSectionContext", existing: "ConfigSect """If top level section is same and there are 3 sections it replaces second element (source module) from existing and keeps the 3rd element (name)""" incoming.pipeline_name = incoming.pipeline_name or existing.pipeline_name if len(incoming.sections) == 3 == len(existing.sections) and incoming.sections[0] == existing.sections[0]: - incoming.sections = (incoming.sections[0], existing.sections[1], incoming.sections[2]) + # existing does not have middle section then keep incoming + # standalone resources do not emit existing to not overwrite each other + incoming.sections = (incoming.sections[0], existing.sections[1] or incoming.sections[1], incoming.sections[2]) incoming.source_state_key = existing.source_state_key or incoming.source_state_key else: incoming.sections = incoming.sections or existing.sections diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index ccc51ddd31..c0a54ec6e7 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -822,8 +822,10 @@ def append_data(data_item: Any) -> None: elif isinstance(data_item, DltResource): # apply hints apply_hint_args(data_item) + # do not set section to prevent source that represent a standalone resource + # to overwrite other standalone resources (ie. parents) in that source sources.append( - DltSource(effective_schema.name, data_item.section or self.pipeline_name, effective_schema, [data_item]) + DltSource(effective_schema.name, "", effective_schema, [data_item]) ) else: # iterator/iterable/generator From c48641ea6d6d1168e098993ba105c181c6c25239 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 5 Oct 2023 13:19:11 +0200 Subject: [PATCH 02/15] name is not set to resource name in table template, must be set explicitly --- dlt/extract/decorators.py | 2 +- dlt/extract/schema.py | 37 +++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index 84dfcb83f9..f9f62a23ff 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -308,7 +308,7 @@ def resource( """ def make_resource(_name: str, _section: str, _data: Any, incremental: IncrementalResourceWrapper = None) -> DltResource: table_template = DltResource.new_table_template( - table_name or _name, + table_name, write_disposition=write_disposition, columns=columns, primary_key=primary_key, diff --git a/dlt/extract/schema.py b/dlt/extract/schema.py index 80e9f6f32f..309b3ab075 100644 --- a/dlt/extract/schema.py +++ b/dlt/extract/schema.py @@ -29,20 +29,25 @@ class TTableSchemaTemplate(TypedDict, total=False): class DltResourceSchema: - def __init__(self, name: str, table_schema_template: TTableSchemaTemplate = None): - self.__qualname__ = self.__name__ = self._name = name + def __init__(self, table_schema_template: TTableSchemaTemplate = None): + self.__qualname__ = self.__name__ = self.name self._table_name_hint_fun: TFunHintTemplate[str] = None self._table_has_other_dynamic_hints: bool = False self._table_schema_template: TTableSchemaTemplate = None if table_schema_template: self.set_template(table_schema_template) + @property + def name(self) -> str: + pass + @property def table_name(self) -> TTableHintTemplate[str]: """Get table name to which resource loads data. May return a callable.""" if self._table_name_hint_fun: return self._table_name_hint_fun - return self._table_schema_template["name"] if self._table_schema_template else self._name + # get table name or default name + return self._table_schema_template.get("name") or self.name if self._table_schema_template else self.name @table_name.setter def table_name(self, value: TTableHintTemplate[str]) -> None: @@ -68,25 +73,27 @@ def columns(self) -> TTableHintTemplate[TTableSchemaColumns]: def compute_table_schema(self, item: TDataItem = None) -> TPartialTableSchema: """Computes the table schema based on hints and column definitions passed during resource creation. `item` parameter is used to resolve table hints based on data""" if not self._table_schema_template: - return new_table(self._name, resource=self._name) + return new_table(self.name, resource=self.name) # resolve a copy of a held template table_template = copy(self._table_schema_template) + if "name" not in table_template: + table_template["name"] = self.name table_template["columns"] = copy(self._table_schema_template["columns"]) # if table template present and has dynamic hints, the data item must be provided if self._table_name_hint_fun and item is None: - raise DataItemRequiredForDynamicTableHints(self._name) + raise DataItemRequiredForDynamicTableHints(self.name) # resolve resolved_template: TTableSchemaTemplate = {k: self._resolve_hint(item, v) for k, v in table_template.items()} # type: ignore resolved_template.pop("incremental", None) resolved_template.pop("validator", None) table_schema = self._merge_keys(resolved_template) - table_schema["resource"] = self._name + table_schema["resource"] = self.name validate_dict_ignoring_xkeys( spec=TPartialTableSchema, doc=table_schema, - path=f"new_table/{self._name}", + path=f"new_table/{self.name}", ) return table_schema @@ -123,7 +130,7 @@ def apply_hints( if table_name: t["name"] = table_name else: - t["name"] = self._name + t.pop("name", None) if parent_table_name is not None: if parent_table_name: t["parent"] = parent_table_name @@ -142,7 +149,8 @@ def apply_hints( # this updates all columns with defaults t["columns"] = update_dict_nested(t["columns"], columns) else: - t.pop("columns", None) + # set to empty columns + t["columns"] = ensure_table_schema_columns(columns) if primary_key is not None: if primary_key: @@ -162,7 +170,7 @@ def apply_hints( def set_template(self, table_schema_template: TTableSchemaTemplate) -> None: DltResourceSchema.validate_dynamic_hints(table_schema_template) # if "name" is callable in the template then the table schema requires actual data item to be inferred - name_hint = table_schema_template["name"] + name_hint = table_schema_template.get("name") if callable(name_hint): self._table_name_hint_fun = name_hint else: @@ -212,9 +220,6 @@ def new_table_template( primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, ) -> TTableSchemaTemplate: - if not table_name: - raise TableNameMissing() - if columns is not None: validator = get_column_validator(columns) columns = ensure_table_schema_columns_hint(columns) @@ -226,6 +231,10 @@ def new_table_template( new_template: TTableSchemaTemplate = new_table( table_name, parent_table_name, write_disposition=write_disposition, columns=columns # type: ignore ) + if not table_name: + new_template.pop("name") + # always remove resource + new_template.pop("resource", None) # type: ignore if primary_key: new_template["primary_key"] = primary_key if merge_key: @@ -237,7 +246,7 @@ def new_table_template( @staticmethod def validate_dynamic_hints(template: TTableSchemaTemplate) -> None: - table_name = template["name"] + table_name = template.get("name") # if any of the hints is a function then name must be as well if any(callable(v) for k, v in template.items() if k not in ["name", "incremental", "validator"]) and not callable(table_name): raise InconsistentTableTemplate(f"Table name {table_name} must be a function if any other table hint is a function") From 434f47e9816a18bf66189e8edcb2a2883ba92cb3 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 5 Oct 2023 13:20:05 +0200 Subject: [PATCH 03/15] removes pipe id, removes separate resource name, defines how resources are cloned and added to the source after it is created --- dlt/extract/extract.py | 6 +- dlt/extract/pipe.py | 49 ++-- dlt/extract/source.py | 124 +++++----- tests/extract/test_decorators.py | 80 +------ tests/extract/test_extract.py | 36 +++ tests/extract/test_extract_pipe.py | 63 +++-- tests/extract/test_sources.py | 266 ++++++++++++++++++++-- tests/load/pipeline/test_restore_state.py | 4 +- 8 files changed, 434 insertions(+), 194 deletions(-) diff --git a/dlt/extract/extract.py b/dlt/extract/extract.py index 721c8b3f0a..5a7e2afa30 100644 --- a/dlt/extract/extract.py +++ b/dlt/extract/extract.py @@ -124,9 +124,7 @@ def _write_static_table(resource: DltResource, table_name: str) -> None: signals.raise_if_signalled() - # TODO: many resources may be returned. if that happens the item meta must be present with table name and this name must match one of resources - # if meta contains table name - resource = source.resources.find_by_pipe(pipe_item.pipe) + resource = source.resources[pipe_item.pipe.name] table_name: str = None if isinstance(pipe_item.meta, TableNameMeta): table_name = pipe_item.meta.table_name @@ -187,7 +185,7 @@ def extract_with_schema( for resource in source.resources.extracted.values(): with contextlib.suppress(DataItemRequiredForDynamicTableHints): if resource.write_disposition == "replace": - _reset_resource_state(resource._name) + _reset_resource_state(resource.name) extractor = extract(extract_id, source, storage, collector, max_parallel_items=max_parallel_items, workers=workers) # iterate over all items in the pipeline and update the schema if dynamic table hints were present diff --git a/dlt/extract/pipe.py b/dlt/extract/pipe.py index 076678cbb0..c24d7433c1 100644 --- a/dlt/extract/pipe.py +++ b/dlt/extract/pipe.py @@ -6,7 +6,7 @@ from concurrent.futures import ThreadPoolExecutor from copy import copy from threading import Thread -from typing import Any, ContextManager, Optional, Sequence, Union, Callable, Iterable, Iterator, List, NamedTuple, Awaitable, Tuple, Type, TYPE_CHECKING, Literal +from typing import Any, ContextManager, Dict, Optional, Sequence, Union, Callable, Iterable, Iterator, List, NamedTuple, Awaitable, Tuple, Type, TYPE_CHECKING, Literal from dlt.common import sleep from dlt.common.configuration import configspec @@ -105,7 +105,6 @@ def __init__(self, name: str, steps: List[TPipeStep] = None, parent: "Pipe" = No self.name = name self._gen_idx = 0 self._steps: List[TPipeStep] = [] - self._pipe_id = f"{name}_{id(self)}" self.parent = parent # add the steps, this will check and mod transformations if steps: @@ -407,14 +406,21 @@ def _ensure_transform_step(self, step_no: int, step: TPipeStep) -> None: else: raise InvalidStepFunctionArguments(self.name, callable_name, sig, str(ty_ex)) - def _clone(self, keep_pipe_id: bool = True, new_name: str = None) -> "Pipe": - """Clones the pipe steps, optionally keeping the pipe id or renaming the pipe. Used internally to clone a list of connected pipes.""" - assert not (new_name and keep_pipe_id), "Cannot keep pipe id when renaming the pipe" - p = Pipe(new_name or self.name, [], self.parent) + def _clone(self, new_name: str = None, with_parent: bool = False) -> "Pipe": + """Clones the pipe steps, optionally renaming the pipe. Used internally to clone a list of connected pipes.""" + new_parent = self.parent + if with_parent and self.parent and not self.parent.is_empty: + parent_new_name = new_name + if new_name: + # if we are renaming the pipe, then also rename the parent + if self.name in self.parent.name: + parent_new_name = self.parent.name.replace(self.name, new_name) + else: + parent_new_name = f"{self.parent.name}_{new_name}" + new_parent = self.parent._clone(parent_new_name, with_parent) + + p = Pipe(new_name or self.name, [], new_parent) p._steps = self._steps.copy() - # clone shares the id with the original - if keep_pipe_id: - p._pipe_id = self._pipe_id return p def __repr__(self) -> str: @@ -422,7 +428,7 @@ def __repr__(self) -> str: bound_str = " data bound to " + repr(self.parent) else: bound_str = "" - return f"Pipe {self.name} ({self._pipe_id})[steps: {len(self._steps)}] at {id(self)}{bound_str}" + return f"Pipe {self.name} [steps: {len(self._steps)}] at {id(self)}{bound_str}" class PipeIterator(Iterator[PipeItem]): @@ -487,7 +493,7 @@ def from_pipes( # print(f"max_parallel_items: {max_parallel_items} workers: {workers}") extract = cls(max_parallel_items, workers, futures_poll_interval, next_item_mode) # clone all pipes before iterating (recursively) as we will fork them (this add steps) and evaluate gens - pipes = PipeIterator.clone_pipes(pipes) + pipes, _ = PipeIterator.clone_pipes(pipes) def _fork_pipeline(pipe: Pipe) -> None: @@ -709,7 +715,7 @@ def _get_source_item_current(self) -> ResolvablePipeItem: try: # get items from last added iterator, this makes the overall Pipe as close to FIFO as possible gen, step, pipe, meta = self._sources[-1] - # print(f"got {pipe.name} {pipe._pipe_id}") + # print(f"got {pipe.name}") # register current pipe name during the execution of gen set_current_pipe_name(pipe.name) item = None @@ -743,7 +749,7 @@ def _get_source_item_round_robin(self) -> ResolvablePipeItem: if sources_count > self._initial_sources_count: return self._get_source_item_current() try: - # print(f"got {pipe.name} {pipe._pipe_id}") + # print(f"got {pipe.name}") # register current pipe name during the execution of gen item = None while item is None: @@ -775,10 +781,12 @@ def _get_source_item_round_robin(self) -> ResolvablePipeItem: raise ResourceExtractionError(pipe.name, gen, str(ex), "generator") from ex @staticmethod - def clone_pipes(pipes: Sequence[Pipe]) -> List[Pipe]: + def clone_pipes(pipes: Sequence[Pipe], existing_cloned_pairs: Dict[int, Pipe] = None) -> Tuple[List[Pipe], Dict[int, Pipe]]: """This will clone pipes and fix the parent/dependent references""" - cloned_pipes = [p._clone() for p in pipes] + cloned_pipes = [p._clone() for p in pipes if id(p) not in (existing_cloned_pairs or {})] cloned_pairs = {id(p): c for p, c in zip(pipes, cloned_pipes)} + if existing_cloned_pairs: + cloned_pairs.update(existing_cloned_pairs) for clone in cloned_pipes: while True: @@ -788,16 +796,17 @@ def clone_pipes(pipes: Sequence[Pipe]) -> List[Pipe]: if clone.parent in cloned_pairs.values(): break # clone if parent pipe not yet cloned - if id(clone.parent) not in cloned_pairs: + parent_id = id(clone.parent) + if parent_id not in cloned_pairs: # print("cloning:" + clone.parent.name) - cloned_pairs[id(clone.parent)] = clone.parent._clone() + cloned_pairs[parent_id] = clone.parent._clone() # replace with clone # print(f"replace depends on {clone.name} to {clone.parent.name}") - clone.parent = cloned_pairs[id(clone.parent)] - # recurr with clone + clone.parent = cloned_pairs[parent_id] + # recur with clone clone = clone.parent - return cloned_pipes + return cloned_pipes, cloned_pairs class ManagedPipeIterator(PipeIterator): diff --git a/dlt/extract/source.py b/dlt/extract/source.py index ecdb0f1993..a08c584598 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -46,7 +46,6 @@ def __init__( incremental: IncrementalResourceWrapper = None, section: str = None ) -> None: - self._name = pipe.name self.section = section self.selected = selected self._pipe = pipe @@ -54,7 +53,7 @@ def __init__( if incremental and not self.incremental: self.add_step(incremental) self.source_name = None - super().__init__(self._name, table_schema_template) + super().__init__(table_schema_template) @classmethod def from_data( @@ -110,11 +109,11 @@ def from_data( @property def name(self) -> str: """Resource name inherited from the pipe""" - return self._name + return self._pipe.name def with_name(self, new_name: str) -> "DltResource": """Clones the resource with a new name. Such resource keeps separate state and loads data to `new_name` table by default.""" - return self.clone(new_name=new_name) + return self._clone(new_name=new_name) @property def is_transformer(self) -> bool: @@ -160,16 +159,16 @@ def validator(self, validator: Optional[ValidateItem]) -> None: def pipe_data_from(self, data_from: Union["DltResource", Pipe]) -> None: """Replaces the parent in the transformer resource pipe from which the data is piped.""" if self.is_transformer: - DltResource._ensure_valid_transformer_resource(self._name, self._pipe.gen) + DltResource._ensure_valid_transformer_resource(self.name, self._pipe.gen) else: - raise ResourceNotATransformer(self._name, "Cannot pipe data into resource that is not a transformer.") - parent_pipe = self._get_parent_pipe(self._name, data_from) + raise ResourceNotATransformer(self.name, "Cannot pipe data into resource that is not a transformer.") + parent_pipe = self._get_parent_pipe(self.name, data_from) self._pipe.parent = parent_pipe def add_pipe(self, data: Any) -> None: """Creates additional pipe for the resource from the specified data""" # TODO: (1) self resource cannot be a transformer (2) if data is resource both self must and it must be selected/unselected + cannot be tranformer - raise InvalidResourceDataTypeMultiplePipes(self._name, data, type(data)) + raise InvalidResourceDataTypeMultiplePipes(self.name, data, type(data)) def select_tables(self, *table_names: Iterable[str]) -> "DltResource": """For resources that dynamically dispatch data to several tables allows to select tables that will receive data, effectively filtering out other data items. @@ -328,12 +327,12 @@ def state(self) -> StrAny: with inject_section(self._get_config_section_context()): return resource_state(self.name) - def clone(self, clone_pipe: bool = True, new_name: str = None) -> "DltResource": - """Creates a deep copy of a current resource, optionally renaming the resource (and cloning pipe). Note that name of a containing source will not be cloned.""" - assert not (new_name and not clone_pipe), "Must clone pipe when changing name" + def _clone(self, new_name: str = None) -> "DltResource": + """Creates a deep copy of a current resource, optionally renaming the resource. The clone will not be part of the source + """ pipe = self._pipe - if self._pipe and not self._pipe.is_empty and clone_pipe: - pipe = pipe._clone(keep_pipe_id=False, new_name=new_name) + if self._pipe and not self._pipe.is_empty: + pipe = pipe._clone(new_name=new_name, with_parent=True) # incremental and parent are already in the pipe (if any) return DltResource( pipe, @@ -346,7 +345,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> "DltResource": """Binds the parametrized resources to passed arguments. Creates and returns a bound resource. Generators and iterators are not evaluated.""" if self._bound: raise TypeError("Bound DltResource object is not callable") - r = self.clone(clone_pipe=True) + r = self._clone() return r.bind(*args, **kwargs) def __or__(self, transform: Union["DltResource", AnyFun]) -> "DltResource": @@ -398,12 +397,14 @@ def _get_config_section_context(self) -> ConfigSectionContext: default_schema_name = pipeline._make_schema_with_default_name().name return ConfigSectionContext( pipeline_name=pipeline_name, - sections=(known_sections.SOURCES, self.section or default_schema_name or uniq_id(), self.source_name or default_schema_name or self._name), + # do not emit middle config section to not overwrite the resource section + # only sources emit middle config section + sections=(known_sections.SOURCES, "", self.source_name or default_schema_name or self.name), source_state_key=self.source_name or default_schema_name or self.section or uniq_id() ) def __str__(self) -> str: - info = f"DltResource [{self._name}]" + info = f"DltResource [{self.name}]" if self.section: info += f" in section [{self.section}]" if self.source_name: @@ -422,7 +423,7 @@ def __str__(self) -> str: info += "\nIf you want to see the data items in the resource you must iterate it or convert to list ie. list(resource). Note that, like any iterator, you can iterate the resource only once." else: info += "\nThis resource is not bound to the data" - info += f"\nInstance: info: (data pipe id:{self._pipe._pipe_id}) at {id(self)}" + info += f"\nInstance: info: (data pipe id:{id(self._pipe)}) at {id(self)}" return info @staticmethod @@ -478,8 +479,11 @@ def __init__(self, source_name: str, source_section: str) -> None: super().__init__() self.source_name = source_name self.source_section = source_section - self._recently_added: List[DltResource] = [] - self._known_pipes: Dict[str, DltResource] = {} + self._suppress_clone_on_setitem = False + # pipes not yet cloned in __setitem__ + self._new_pipes: List[Pipe] = [] + # pipes already cloned by __setitem__ id(original Pipe):cloned(Pipe) + self._cloned_pairs: Dict[int, Pipe] = {} @property def selected(self) -> Dict[str, DltResource]: @@ -497,7 +501,7 @@ def extracted(self) -> Dict[str, DltResource]: while (pipe := resource._pipe.parent) is not None: if not pipe.is_empty: try: - resource = self.find_by_pipe(pipe) + resource = self[pipe.name] except KeyError: # resource for pipe not found: return mock resource mock_template = DltResourceSchema.new_table_template( @@ -506,7 +510,7 @@ def extracted(self) -> Dict[str, DltResource]: ) resource = DltResource(pipe, mock_template, False, section=resource.section) resource.source_name = resource.source_name - extracted[resource._name] = resource + extracted[resource.name] = resource else: break return extracted @@ -532,12 +536,10 @@ def selected_dag(self) -> List[Tuple[str, str]]: @property def pipes(self) -> List[Pipe]: - # TODO: many resources may share the same pipe so return ordered set return [r._pipe for r in self.values()] @property def selected_pipes(self) -> Sequence[Pipe]: - # TODO: many resources may share the same pipe so return ordered set return [r._pipe for r in self.values() if r.selected] def select(self, *resource_names: str) -> Dict[str, DltResource]: @@ -548,34 +550,56 @@ def select(self, *resource_names: str) -> Dict[str, DltResource]: raise ResourcesNotFoundError(self.source_name, set(self.keys()), set(resource_names)) # set the selected flags for resource in self.values(): - self[resource._name].selected = resource._name in resource_names + self[resource.name].selected = resource.name in resource_names return self.selected - def find_by_pipe(self, pipe: Pipe) -> DltResource: - # TODO: many resources may share the same pipe so return a list and also filter the resources by self._enabled_resource_names - # identify pipes by _pipe_id - if pipe._pipe_id in self._known_pipes: - return self._known_pipes[pipe._pipe_id] + def add(self, resources: Sequence[DltResource]) -> None: try: - return self._known_pipes.setdefault(pipe._pipe_id, next(r for r in self.values() if r._pipe._pipe_id == pipe._pipe_id)) - except StopIteration: - raise KeyError(pipe) - - def clone_new_pipes(self) -> None: - cloned_pipes = ManagedPipeIterator.clone_pipes([r._pipe for r in self.values() if r in self._recently_added]) + # temporarily block cloning when single resource is added + self._suppress_clone_on_setitem = True + for resource in resources: + if resource.name in self: + # for resources with the same name try to add the resource as an another pipe + self[resource.name].add_pipe(resource) + else: + self[resource.name] = resource + finally: + self._suppress_clone_on_setitem = False + self._clone_new_pipes([r.name for r in resources]) + + def _clone_new_pipes(self, resource_names: Sequence[str]) -> None: + # clone all new pipes and keep + _, self._cloned_pairs = ManagedPipeIterator.clone_pipes(self._new_pipes, self._cloned_pairs) + # self._cloned_pairs.update(cloned_pairs) # replace pipes in resources, the cloned_pipes preserve parent connections - for cloned in cloned_pipes: - self.find_by_pipe(cloned)._pipe = cloned - self._recently_added.clear() + for name in resource_names: + resource = self[name] + pipe_id = id(resource._pipe) + if pipe_id in self._cloned_pairs: + resource._pipe = self._cloned_pairs[pipe_id] + self._new_pipes.clear() def __setitem__(self, resource_name: str, resource: DltResource) -> None: + if resource_name != resource.name: + raise ValueError(f"The index name {resource_name} does not correspond to resource name {resource.name}") + pipe_id = id(resource._pipe) # make shallow copy of the resource resource = copy(resource) - resource.section = self.source_section + # resource.section = self.source_section resource.source_name = self.source_name + if pipe_id in self._cloned_pairs: + # if resource_name in self: + # raise ValueError(f"Resource with name {resource_name} and pipe id {id(pipe_id)} is already present in the source. " + # "Modify the resource pipe directly instead of setting a possibly modified instance.") + # TODO: instead of replacing pipe with existing one we should clone and replace the existing one in all resources that have it + resource._pipe = self._cloned_pairs[pipe_id] + else: + self._new_pipes.append(resource._pipe) # now set it in dict - self._recently_added.append(resource) - return super().__setitem__(resource_name, resource) + super().__setitem__(resource_name, resource) + # immediately clone pipe if not suppressed + if not self._suppress_clone_on_setitem: + self._clone_new_pipes([resource.name]) def __delitem__(self, resource_name: str) -> None: raise DeletingResourcesNotSupported(self.source_name, resource_name) @@ -605,9 +629,7 @@ def __init__(self, name: str, section: str, schema: Schema, resources: Sequence[ warnings.warn(f"Schema name {schema.name} differs from source name {name}! The explicit source name argument is deprecated and will be soon removed.") if resources: - for resource in resources: - self._add_resource(resource._name, resource) - self._resources.clone_new_pipes() + self.resources.add(resources) @classmethod def from_data(cls, name: str, section: str, schema: Schema, data: Any) -> "DltSource": @@ -780,16 +802,6 @@ def _get_config_section_context(self) -> ConfigSectionContext: source_state_key=self.name ) - def _add_resource(self, name: str, resource: DltResource) -> None: - if self.exhausted: - raise SourceExhausted(self.name) - - if name in self._resources: - # for resources with the same name try to add the resource as an another pipe - self._resources[name].add_pipe(resource) - else: - self._resources[name] = resource - def __getattr__(self, resource_name: str) -> DltResource: return self._resources[resource_name] @@ -805,9 +817,9 @@ def __str__(self) -> str: for r in self.resources.values(): selected_info = "selected" if r.selected else "not selected" if r.is_transformer: - info += f"\ntransformer {r._name} is {selected_info} and takes data from {r._pipe.parent.name}" + info += f"\ntransformer {r.name} is {selected_info} and takes data from {r._pipe.parent.name}" else: - info += f"\nresource {r._name} is {selected_info}" + info += f"\nresource {r.name} is {selected_info}" if self.exhausted: info += "\nSource is already iterated and cannot be used again ie. to display or load data." else: diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index c1a1f7c937..aa3cd27619 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -104,7 +104,7 @@ def empty_t_1(items, _meta): # here we bind the bound_r to empty_t_1 and then evaluate gen on a clone pipe which fails with pytest.raises(ParametrizedResourceUnbound): - (bound_r | empty_t_1)._pipe._clone(keep_pipe_id=False).evaluate_gen() + (bound_r | empty_t_1)._pipe._clone().evaluate_gen() # here we still have original (non cloned) pipe where gen was not evaluated assert list(empty_t_1("_meta")) == [1, 2, 3, 1, 2, 3, 1, 2, 3] @@ -219,83 +219,7 @@ def get_users(): # delete columns by passing empty users.apply_hints(columns={}) - assert users.columns is None - - -def test_apply_hints() -> None: - @dlt.resource - def empty(): - yield [1, 2, 3] - - empty_r = empty() - assert empty_r.write_disposition == "append" - empty_r.apply_hints(write_disposition="replace") - assert empty_r.write_disposition == "replace" - empty_r.write_disposition = "merge" - assert empty_r.compute_table_schema()["write_disposition"] == "merge" - # delete hint - empty_r.apply_hints(write_disposition="") - empty_r.write_disposition = "append" - assert empty_r.compute_table_schema()["write_disposition"] == "append" - - empty_r.apply_hints(table_name="table", parent_table_name="parent", primary_key=["a", "b"], merge_key=["c", "a"]) - table = empty_r.compute_table_schema() - assert table["columns"]["a"] == {'merge_key': True, 'name': 'a', 'nullable': False, 'primary_key': True} - assert table["columns"]["b"] == {'name': 'b', 'nullable': False, 'primary_key': True} - assert table["columns"]["c"] == {'merge_key': True, 'name': 'c', 'nullable': False} - assert table["name"] == "table" - assert table["parent"] == "parent" - - # reset - empty_r.apply_hints(table_name="", parent_table_name="", primary_key=[], merge_key="") - table = empty_r.compute_table_schema() - assert table["name"] == "empty" - assert "parent" not in table - assert table["columns"] == {} - - # combine columns with primary key - - empty_r = empty() - empty_r.apply_hints(columns={"tags": {"data_type": "complex", "primary_key": False}}, primary_key="tags", merge_key="tags") - # primary key not set here - assert empty_r.columns["tags"] == {"data_type": "complex", "name": "tags", "primary_key": False} - # only in the computed table - assert empty_r.compute_table_schema()["columns"]["tags"] == {"data_type": "complex", "name": "tags", "primary_key": True, "merge_key": True} - - -def test_apply_dynamic_hints() -> None: - @dlt.resource - def empty(): - yield [1, 2, 3] - - empty_r = empty() - with pytest.raises(InconsistentTableTemplate): - empty_r.apply_hints(parent_table_name=lambda ev: ev["p"]) - - empty_r.apply_hints(table_name=lambda ev: ev["t"], parent_table_name=lambda ev: ev["p"]) - assert empty_r._table_name_hint_fun is not None - assert empty_r._table_has_other_dynamic_hints is True - - with pytest.raises(DataItemRequiredForDynamicTableHints): - empty_r.compute_table_schema() - table = empty_r.compute_table_schema({"t": "table", "p": "parent"}) - assert table["name"] == "table" - assert table["parent"] == "parent" - - # try write disposition and primary key - empty_r.apply_hints(primary_key=lambda ev: ev["pk"], write_disposition=lambda ev: ev["wd"]) - table = empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip"}) - assert table["write_disposition"] == "skip" - assert "a" in table["columns"] - - # validate fails - with pytest.raises(DictValidationException): - empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "x-skip"}) - - # dynamic columns - empty_r.apply_hints(columns=lambda ev: ev["c"]) - table = empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip", "c": [{"name": "tags"}]}) - assert table["columns"]["tags"] == {"name": "tags"} + assert users.columns == {} def test_columns_from_pydantic() -> None: diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py index 530a089f1c..116d7bd0ba 100644 --- a/tests/extract/test_extract.py +++ b/tests/extract/test_extract.py @@ -72,3 +72,39 @@ def table_name_with_lambda(_range): schema = expect_tables(table_name_with_lambda) assert "table_name_with_lambda" not in schema.tables + + +# def test_extract_pipe_from_unknown_resource(): +# pass + + +def test_extract_shared_pipe(): + def input_gen(): + yield from [1, 2, 3] + + input_r = DltResource.from_data(input_gen) + source = DltSource("selectables", "module", dlt.Schema("selectables"), [input_r, input_r.with_name("gen_clone")]) + storage = ExtractorStorage(NormalizeStorageConfiguration()) + extract_id = storage.create_extract_id() + schema_update = extract(extract_id, source, storage) + print(schema_update) + + +def test_extract_renamed_clone_and_parent(): + def input_gen(): + yield from [1, 2, 3] + + def tx_step(item): + return item*2 + + input_r = DltResource.from_data(input_gen) + input_tx = DltResource.from_data(tx_step, data_from=DltResource.Empty) + + source = DltSource("selectables", "module", dlt.Schema("selectables"), [input_r, (input_r | input_tx).with_name("tx_clone")]) + storage = ExtractorStorage(NormalizeStorageConfiguration()) + extract_id = storage.create_extract_id() + schema_update = extract(extract_id, source, storage) + assert "input_gen" in schema_update + assert "tx_clone" in schema_update + # mind that pipe name of the evaluated parent will have different name than the resource + assert source.tx_clone._pipe.parent.name == "input_gen_tx_clone" diff --git a/tests/extract/test_extract_pipe.py b/tests/extract/test_extract_pipe.py index a983f4ad26..a4e894bf94 100644 --- a/tests/extract/test_extract_pipe.py +++ b/tests/extract/test_extract_pipe.py @@ -95,10 +95,6 @@ def get_pipes(): assert time.time() - started < 0.8 - - - - def test_add_step() -> None: data = [1, 2, 3] data_iter = iter(data) @@ -284,10 +280,10 @@ def item_meta_step_trans(item: int, meta): t.append_step(item_meta_step_trans) # type: ignore[arg-type] _l = list(PipeIterator.from_pipes([p, t], yield_parents=True)) # same result for transformer - tran_l = [pi for pi in _l if pi.pipe._pipe_id == t._pipe_id] + tran_l = [pi for pi in _l if pi.pipe.name == t.name] assert [int((pi.item//2)**0.5//2) for pi in tran_l] == data # type: ignore[operator] assert [pi.meta for pi in tran_l] == _meta - data_l = [pi for pi in _l if pi.pipe._pipe_id == p._pipe_id] + data_l = [pi for pi in _l if pi.pipe.name is p.name] # data pipe went only through one transformation assert [int(pi.item//2) for pi in data_l] == data # type: ignore[operator] assert [pi.meta for pi in data_l] == _meta @@ -479,6 +475,47 @@ def test_pipe_copy_on_fork() -> None: assert elems[0].item is not elems[1].item +def test_clone_single_pipe() -> None: + doc = {"e": 1, "l": 2} + parent = Pipe.from_data("data", [doc]) + + # default clone + cloned_p = parent._clone() + assert cloned_p.name == parent.name + assert cloned_p != parent + assert id(cloned_p.steps) != id(parent.steps) + assert cloned_p.gen == parent.gen + cloned_p = parent._clone(with_parent=True) + assert cloned_p != parent + # with rename + cloned_p = parent._clone(new_name="new_name") + assert cloned_p.name == "new_name" + assert id(cloned_p.steps) != id(parent.steps) + + # add child + child1 = Pipe("tr1", [lambda x: x], parent=parent) + child2 = Pipe("tr2", [lambda x: x], parent=child1) + + # clone child without parent + cloned_ch2 = child2._clone() + assert cloned_ch2.parent == child1 + cloned_ch2 = child2._clone(new_name="new_child_2") + assert cloned_ch2.name == "new_child_2" + assert cloned_ch2.parent == child1 + assert cloned_ch2.parent.name == child1.name + + # clone child with parent + cloned_ch2 = child2._clone(with_parent=True, new_name="new_child_2") + assert cloned_ch2.parent != child1 + assert cloned_ch2.parent.name == "tr1_new_child_2" + assert cloned_ch2.parent.parent != parent + assert cloned_ch2.parent.parent.name == "data_tr1_new_child_2" + # rename again + cloned_ch2_2 = cloned_ch2._clone(with_parent=True, new_name="a_new_name") + assert cloned_ch2_2.parent.name == "tr1_a_new_name" + assert cloned_ch2_2.parent.parent.name == "data_tr1_a_new_name" + + def test_clone_pipes() -> None: def pass_gen(item, meta): @@ -494,28 +531,24 @@ def pass_gen(item, meta): # pass all pipes explicitly pipes = [p1, p2, p1_p3, p1_p4, p2_p5, p5_p6] - cloned_pipes = PipeIterator.clone_pipes(pipes) + cloned_pipes, _ = PipeIterator.clone_pipes(pipes) assert_cloned_pipes(pipes, cloned_pipes) # clone only two top end pipes, still all parents must be cloned as well pipes = [p1_p4, p5_p6] - cloned_pipes = PipeIterator.clone_pipes(pipes) + cloned_pipes, _ = PipeIterator.clone_pipes(pipes) assert_cloned_pipes(pipes, cloned_pipes) c_p5_p6 = cloned_pipes[-1] assert c_p5_p6.parent.parent is not p2 - assert c_p5_p6.parent.parent._pipe_id == p2._pipe_id - - # try circular deps - + assert c_p5_p6.parent.parent.name == p2.name -def assert_cloned_pipes(pipes: List[Pipe], cloned_pipes: List[Pipe]): +def assert_cloned_pipes(pipes: List[Pipe], cloned_pipes: List[Pipe]) -> None: # clones pipes must be separate instances but must preserve pipe id and names for pipe, cloned_pipe in zip(pipes, cloned_pipes): while True: assert pipe is not cloned_pipe assert pipe.name == cloned_pipe.name - assert pipe._pipe_id == cloned_pipe._pipe_id assert pipe.has_parent == cloned_pipe.has_parent # check all the parents @@ -540,7 +573,7 @@ def pass_gen(item, meta): pipes = [c_p1_p3, c_p1_p4] # can be cloned - cloned_pipes = PipeIterator.clone_pipes(pipes) + cloned_pipes, _ = PipeIterator.clone_pipes(pipes) # cannot be evaluated with pytest.raises(RecursionError): diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index 123768274c..93afce8f80 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -5,14 +5,14 @@ import dlt from dlt.common.configuration.container import Container -from dlt.common.exceptions import PipelineStateNotAvailable +from dlt.common.exceptions import DictValidationException, PipelineStateNotAvailable from dlt.common.pipeline import StateInjectableContext, source_state from dlt.common.schema import Schema from dlt.common.typing import TDataItems -from dlt.extract.exceptions import InvalidParentResourceDataType, InvalidParentResourceIsAFunction, InvalidTransformerDataTypeGeneratorFunctionRequired, InvalidTransformerGeneratorFunction, ParametrizedResourceUnbound, ResourcesNotFoundError +from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, InconsistentTableTemplate, InvalidParentResourceDataType, InvalidParentResourceIsAFunction, InvalidResourceDataTypeMultiplePipes, InvalidTransformerDataTypeGeneratorFunctionRequired, InvalidTransformerGeneratorFunction, ParametrizedResourceUnbound, ResourcesNotFoundError from dlt.extract.pipe import Pipe from dlt.extract.typing import FilterItem, MapItem -from dlt.extract.source import DltResource, DltSource +from dlt.extract.source import DltResource, DltResourceDict, DltSource def test_call_data_resource() -> None: @@ -213,8 +213,6 @@ def regular(_input): # resource is different instance assert regular is not b_regular assert regular._pipe is not b_regular._pipe - # pipe has different id - assert regular._pipe._pipe_id != b_regular._pipe._pipe_id # pipe is replaced on resource returning resource (new pipe created) returns_res.add_filter(lambda x: x == "A") @@ -287,7 +285,7 @@ def some_data(param: str): # create two resource instances and extract in single ad hoc resource data1 = some_data("state1") - data1._name = "state1_data" + data1._pipe.name = "state1_data" dlt.pipeline(full_refresh=True).extract([data1, some_data("state2")], schema=Schema("default")) # both should be extracted. what we test here is the combination of binding the resource by calling it that clones the internal pipe # and then creating a source with both clones. if we keep same pipe id when cloning on call, a single pipe would be created shared by two resources @@ -421,8 +419,8 @@ def _gen(i): # resource is a clone assert s.resources[name] is not clone_s.resources[name] assert s.resources[name]._pipe is not clone_s.resources[name]._pipe - # but we keep pipe ids - assert s.resources[name]._pipe._pipe_id == clone_s.resources[name]._pipe._pipe_id + # but we keep pipe names + assert s.resources[name].name == clone_s.resources[name].name assert list(s) == ['', 'A', 'AA', 'AAA'] # we expired generators @@ -823,13 +821,78 @@ def test_source(): # pass -@pytest.mark.skip("not implemented") -def test_resource_dict() -> None: - # the dict of resources in source - # test clone - # test delete - - pass +def test_resource_dict_add() -> None: + def input_gen(): + yield from [1, 2, 3] + + def tx_step(item): + return item*2 + + res_dict = DltResourceDict("source", "section") + input_r = DltResource.from_data(input_gen) + input_r_orig_pipe = input_r._pipe + input_tx = DltResource.from_data(tx_step, data_from=DltResource.Empty) + input_tx_orig_pipe = input_tx._pipe + + res_dict["tx_step"] = input_r | input_tx + # pipes cloned on setter + assert res_dict["tx_step"] is not input_tx + assert res_dict["tx_step"]._pipe is not input_tx._pipe + # pipes in original resources not touched + assert input_r_orig_pipe == input_r._pipe + assert input_tx_orig_pipe == input_tx._pipe + + # now add the parent + res_dict["input_gen"] = input_r + # got cloned + assert res_dict["input_gen"] is not input_r + # but the clone points to existing parent + assert res_dict["input_gen"]._pipe is res_dict["tx_step"]._pipe.parent + assert res_dict._new_pipes == [] + assert len(res_dict._cloned_pairs) == 2 + res_dict["tx_clone"] = (input_r | input_tx).with_name("tx_clone") + assert res_dict["tx_clone"]._pipe.parent is not res_dict["input_gen"]._pipe + assert len(res_dict._cloned_pairs) == 4 + assert input_r_orig_pipe == input_r._pipe + assert input_tx_orig_pipe == input_tx._pipe + + + # add all together + res_dict = DltResourceDict("source", "section") + res_dict.add([input_r , input_r | input_tx]) + assert res_dict._new_pipes == [] + assert res_dict._suppress_clone_on_setitem is False + assert res_dict["input_gen"]._pipe is res_dict["tx_step"]._pipe.parent + # pipes in original resources not touched + assert input_r_orig_pipe == input_r._pipe + assert input_tx_orig_pipe == input_tx._pipe + + + # replace existing resource which has the old pipe + res_dict["input_gen"] = input_r + # an existing clone got assigned + assert res_dict["input_gen"]._pipe is res_dict["tx_step"]._pipe.parent + # keep originals + assert input_r_orig_pipe == input_r._pipe + assert input_tx_orig_pipe == input_tx._pipe + + # replace existing resource which has the new pipe + res_dict["input_gen"] = input_r() + # we have disconnected gen and parent of tx TODO: we should handle this + assert res_dict["input_gen"]._pipe is not res_dict["tx_step"]._pipe.parent + # keep originals + assert input_r_orig_pipe == input_r._pipe + assert input_tx_orig_pipe == input_tx._pipe + + + + # can't set with different name than resource really has + with pytest.raises(ValueError): + res_dict["input_gen_x"] = input_r.with_name("uniq") + + # can't add resource with same name again + with pytest.raises(InvalidResourceDataTypeMultiplePipes): + res_dict.add([input_r]) def test_source_multiple_iterations() -> None: @@ -907,18 +970,183 @@ def _t1(items, suffix): # new name of resource and pipe assert r1_clone.name == "r1_clone" assert r1_clone._pipe.name == "r1_clone" + assert r1_clone.table_name == "r1_clone" # original keeps old name and pipe - assert r1._pipe != r1_clone._pipe + assert r1._pipe is not r1_clone._pipe assert r1.name == "_r1" + assert r1.table_name == "_r1" - # clone transformer + # clone transformer before it is bound bound_t1_clone = r1_clone | _t1.with_name("t1_clone")("ax") bound_t1_clone_2 = r1_clone | _t1("ax_2").with_name("t1_clone_2") assert bound_t1_clone.name == "t1_clone" assert bound_t1_clone_2.name == "t1_clone_2" - # but parent is the same - assert bound_t1_clone_2._pipe.parent == bound_t1_clone._pipe.parent + assert bound_t1_clone.table_name == "t1_clone" + assert bound_t1_clone_2.table_name == "t1_clone_2" + # but parent is the same (we cloned only transformer - before it is bound) + assert bound_t1_clone_2._pipe.parent is bound_t1_clone._pipe.parent # evaluate transformers assert list(bound_t1_clone) == ['a_ax', 'b_ax', 'c_ax'] assert list(bound_t1_clone_2) == ['a_ax_2', 'b_ax_2', 'c_ax_2'] + + # clone pipes (bound transformer) + pipe_r1 = _r1() + pipe_t1 = _t1("cx") + pipe_r1_t1 = pipe_r1 | pipe_t1 + pipe_r1_t1_clone = pipe_r1_t1.with_name("pipe_clone") + assert pipe_r1_t1_clone.name == "pipe_clone" + # parent of the pipe also cloned and renamed + assert pipe_r1_t1_clone._pipe.parent.name == "_r1_pipe_clone" + # originals are not affected + assert pipe_r1.name == "_r1" + assert pipe_t1.name == "_t1" + # binding a transformer is not cloning the original + assert pipe_t1._pipe is pipe_r1_t1._pipe + assert pipe_r1._pipe is pipe_r1_t1._pipe.parent + # with_name clones + assert pipe_t1._pipe is not pipe_r1_t1_clone._pipe + assert pipe_r1._pipe is not pipe_r1_t1_clone._pipe.parent + + # rename again + pipe_r1_t1_clone_2 = pipe_r1_t1_clone.with_name("pipe_clone_2") + # replace previous name part (pipe_clone in _r1_pipe_clone) with pipe_clone_2 + assert pipe_r1_t1_clone_2._pipe.parent.name == "_r1_pipe_clone_2" + + # preserves table name if set + table_t1 = _r1 | _t1 + table_t1.table_name = "Test_Table" + table_t1_clone = table_t1.with_name("table_t1_clone") + assert table_t1_clone.name == "table_t1_clone" + assert table_t1_clone.table_name == "Test_Table" + + # also preserves when set the same name as resource name + assert table_t1.name == "_t1" + table_t1.table_name = "_t1" + table_t1_clone = table_t1.with_name("table_t1_clone") + assert table_t1_clone.name == "table_t1_clone" + assert table_t1_clone.table_name == "_t1" + + +def test_apply_hints() -> None: + def empty_gen(): + yield [1, 2, 3] + empty_table_schema = {"name": "empty_gen", 'columns': {}, 'resource': 'empty_gen', 'write_disposition': 'append'} + + empty = DltResource.from_data(empty_gen) + + empty_r = empty() + # check defaults + assert empty_r.name == empty.name == empty_r.table_name == empty.table_name == "empty_gen" + assert empty_r._table_schema_template is None + assert empty_r.compute_table_schema() == empty_table_schema + assert empty_r.write_disposition == "append" + + empty_r.apply_hints(write_disposition="replace") + assert empty_r.write_disposition == "replace" + empty_r.write_disposition = "merge" + assert empty_r.compute_table_schema()["write_disposition"] == "merge" + # delete hint + empty_r.apply_hints(write_disposition="") + empty_r.write_disposition = "append" + assert empty_r.compute_table_schema()["write_disposition"] == "append" + + empty_r.apply_hints(table_name="table", parent_table_name="parent", primary_key=["a", "b"], merge_key=["c", "a"]) + table = empty_r.compute_table_schema() + assert table["columns"]["a"] == {'merge_key': True, 'name': 'a', 'nullable': False, 'primary_key': True} + assert table["columns"]["b"] == {'name': 'b', 'nullable': False, 'primary_key': True} + assert table["columns"]["c"] == {'merge_key': True, 'name': 'c', 'nullable': False} + assert table["name"] == "table" + assert table["parent"] == "parent" + assert empty_r.table_name == "table" + + # reset + empty_r.apply_hints(table_name="", parent_table_name="", primary_key=[], merge_key="", columns={}) + assert empty_r._table_schema_template == {'columns': {}, 'incremental': None, 'validator': None, 'write_disposition': 'append'} + table = empty_r.compute_table_schema() + assert table["name"] == "empty_gen" + assert "parent" not in table + assert table["columns"] == {} + assert empty_r.compute_table_schema() == empty_table_schema + + # combine columns with primary key + empty_r = empty() + empty_r.apply_hints(columns={"tags": {"data_type": "complex", "primary_key": False}}, primary_key="tags", merge_key="tags") + # primary key not set here + assert empty_r.columns["tags"] == {"data_type": "complex", "name": "tags", "primary_key": False} + # only in the computed table + assert empty_r.compute_table_schema()["columns"]["tags"] == {"data_type": "complex", "name": "tags", "primary_key": True, "merge_key": True} + + +def test_apply_dynamic_hints() -> None: + def empty_gen(): + yield [1, 2, 3] + + empty = DltResource.from_data(empty_gen) + + empty_r = empty() + with pytest.raises(InconsistentTableTemplate): + empty_r.apply_hints(parent_table_name=lambda ev: ev["p"]) + + empty_r.apply_hints(table_name=lambda ev: ev["t"], parent_table_name=lambda ev: ev["p"]) + assert empty_r._table_name_hint_fun is not None + assert empty_r._table_has_other_dynamic_hints is True + + with pytest.raises(DataItemRequiredForDynamicTableHints): + empty_r.compute_table_schema() + table = empty_r.compute_table_schema({"t": "table", "p": "parent"}) + assert table["name"] == "table" + assert table["parent"] == "parent" + + # try write disposition and primary key + empty_r.apply_hints(primary_key=lambda ev: ev["pk"], write_disposition=lambda ev: ev["wd"]) + table = empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip"}) + assert table["write_disposition"] == "skip" + assert "a" in table["columns"] + + # validate fails + with pytest.raises(DictValidationException): + empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "x-skip"}) + + # dynamic columns + empty_r.apply_hints(columns=lambda ev: ev["c"]) + table = empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip", "c": [{"name": "tags"}]}) + assert table["columns"]["tags"] == {"name": "tags"} + + +def test_selected_pipes_with_duplicates(): + def input_gen(): + yield from [1, 2, 3] + + def tx_step(item): + return item*2 + + input_r = DltResource.from_data(input_gen) + input_r_clone = input_r.with_name("input_gen_2") + + # separate resources have separate pipe instances + source = DltSource("dupes", "module", Schema("dupes"), [input_r, input_r_clone]) + pipes = source.resources.pipes + assert len(pipes) == 2 + assert pipes[0].name == "input_gen" + assert source.resources[pipes[0].name] == source.input_gen + selected_pipes = source.resources.selected_pipes + assert len(selected_pipes) == 2 + assert selected_pipes[0].name == "input_gen" + assert list(source) == [1, 2, 3, 1, 2, 3] + + # cloned from fresh resource + source = DltSource("dupes", "module", Schema("dupes"), [DltResource.from_data(input_gen), DltResource.from_data(input_gen).with_name("gen_2")]) + assert list(source) == [1, 2, 3, 1, 2, 3] + + # clone transformer + input_r = DltResource.from_data(input_gen) + input_tx = DltResource.from_data(tx_step, data_from=DltResource.Empty) + source = DltSource("dupes", "module", Schema("dupes"), [input_r, (input_r | input_tx).with_name("tx_clone")]) + pipes = source.resources.pipes + assert len(pipes) == 2 + assert source.resources[pipes[0].name] == source.input_gen + assert source.resources[pipes[1].name] == source.tx_clone + selected_pipes = source.resources.selected_pipes + assert len(selected_pipes) == 2 + assert list(source) == [1, 2, 3, 2, 4, 6] \ No newline at end of file diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 254365a725..bf503efa90 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -251,7 +251,7 @@ def some_data(): # extract by creating ad hoc source in pipeline that keeps state under pipeline name data1 = some_data("state1") - data1._name = "state1_data" + data1._pipe.name = "state1_data" p.extract([data1, some_data("state2")], schema=Schema("default")) data_two = source_two("state3") @@ -435,7 +435,7 @@ def some_data(param: str) -> Any: # extract two resources that modify the state data1 = some_data("state1") - data1._name = "state1_data" + data1._pipe.name = "state1_data" p.run([data1, some_data("state2")], schema=Schema("default"), destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) orig_state = p.state From 1c025d4b57e29d6043682054729349c3c2d6c16c Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Fri, 6 Oct 2023 16:30:38 +0200 Subject: [PATCH 04/15] simplifies and documents with_name method --- dlt/extract/source.py | 12 +++-- docs/website/docs/general-usage/resource.md | 46 ++++++++++++++-- docs/website/docs/general-usage/source.md | 21 ++++++-- tests/extract/test_extract.py | 4 +- tests/extract/test_incremental.py | 1 - tests/extract/test_sources.py | 60 +++++++++++++++++++-- 6 files changed, 124 insertions(+), 20 deletions(-) diff --git a/dlt/extract/source.py b/dlt/extract/source.py index a08c584598..8a6c5d9420 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -553,7 +553,7 @@ def select(self, *resource_names: str) -> Dict[str, DltResource]: self[resource.name].selected = resource.name in resource_names return self.selected - def add(self, resources: Sequence[DltResource]) -> None: + def add(self, *resources: DltResource) -> None: try: # temporarily block cloning when single resource is added self._suppress_clone_on_setitem = True @@ -629,7 +629,7 @@ def __init__(self, name: str, section: str, schema: Schema, resources: Sequence[ warnings.warn(f"Schema name {schema.name} differs from source name {name}! The explicit source name argument is deprecated and will be soon removed.") if resources: - self.resources.add(resources) + self.resources.add(*resources) @classmethod def from_data(cls, name: str, section: str, schema: Schema, data: Any) -> "DltSource": @@ -803,12 +803,14 @@ def _get_config_section_context(self) -> ConfigSectionContext: ) def __getattr__(self, resource_name: str) -> DltResource: - return self._resources[resource_name] + try: + return self._resources[resource_name] + except KeyError: + raise AttributeError(f"Resource with name {resource_name} not found in source {self.name}") def __setattr__(self, name: str, value: Any) -> None: if isinstance(value, DltResource): - # TODO: refactor adding resources. 1. resource dict should be read only 2. we should correct the parent pipes after cloning 3. allow replacing existing resources - self._add_resource(name, value) + self.resources[name] = value else: super().__setattr__(name, value) diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index 305eb29da7..2d45efae52 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -177,7 +177,7 @@ for row in generate_rows(20): You can mark some resource arguments as configuration and [credentials](credentials.md) values so `dlt` can pass them automatically to your functions. -### Feeding data from one resource into another +### Process resources with `dlt.transformer` You can feed data from a resource into another one. The most common case is when you have an API that returns a list of objects (i.e. users) in one endpoint and user details in another. You can deal @@ -201,7 +201,10 @@ def users_details(user_item): # just load the user_details. # dlt figures out dependencies for you. pipeline.run(user_details) - +``` +In the example above, `user_details` will receive data from default instance of `users` resource (with `limit` set to `None`). You can also use +**pipe |** operator to bind resources dynamically +```python # you can be more explicit and use a pipe operator. # with it you can create dynamic pipelines where the dependencies # are set at run time and resources are parametrized i.e. @@ -274,7 +277,7 @@ assert list(r) == list(range(10)) > 💡 You cannot limit transformers. They should process all the data they receive fully to avoid > inconsistencies in generated datasets. -### Adjust schema +### Set table and adjust schema You can change the schema of a resource, be it standalone or as a part of a source. Look for method named `apply_hints` which takes the same arguments as resource decorator. Obviously you should call @@ -300,15 +303,44 @@ tables = sql_database() tables.users.table_name = "other_users" ``` +### Duplicate and rename resources +There are cases when you your resources are generic (ie. bucket filesystem) and you want to load several instances of it (ie. files from different folders) to separate tables. In example below we use `filesystem` source to load csvs from two different folders into separate tables: +```python +@dlt.resource +def filesystem(bucket_url) + # list and yield files in bucket_url + ... + +@dlt.transformer +def csv_reader(file_item): + # load csv, parse and yield rows in file_item + ... + +# create two extract pipes that list files from the bucket and send to them to the reader. +# by default both pipes will load data to the same table (csv_reader) +reports_pipe = filesystem("s3://my-bucket/reports") | load_csv() +transactions_pipe = filesystem("s3://my-bucket/transactions") | load_csv() + +# so we rename resources to load to "reports" and "transactions" tables +pipeline.run( + [reports_pipe.with_name("reports"), transactions_pipe.with_name("transactions")] +) +``` + +`with_name` method returns a deep copy of the original resource, its data pipe and the data pipes of a parent resources. A renamed clone is fully separated from the original resource (and other clones) when loading: + it maintains a separate [resource state](state.md#read-and-write-pipeline-state-in-a-resource) and will load to a table + ## Load resources You can pass individual resources or list of resources to the `dlt.pipeline` object. The resources loaded outside the source context, will be added to the [default schema](schema.md) of the pipeline. -Example using the `generate_rows` resource above: - ```python +@dlt.resource(name='table_name', write_disposition='replace') +def generate_rows(nr): + for i in range(nr): + yield {'id':i, 'example_string':'abc'} pipeline = dlt.pipeline( pipeline_name="rows_pipeline", @@ -321,6 +353,10 @@ pipeline.run(generate_rows(10)) pipeline.run([generate_rows(10), generate_rows(20)]) ``` +### Resource state +[Resource state](state.md#read-and-write-pipeline-state-in-a-resource) is kept + + ### Do a full refresh To do a full refresh of an `append` or `merge` resources you temporarily change the write diff --git a/docs/website/docs/general-usage/source.md b/docs/website/docs/general-usage/source.md index 3b7ca266e1..1f520336df 100644 --- a/docs/website/docs/general-usage/source.md +++ b/docs/website/docs/general-usage/source.md @@ -112,27 +112,38 @@ Find more on sampling data [here](resource.md#sample-from-large-data). You can add a custom resource to source after it was created. Imagine that you want to score all the deals with a keras model that will tell you if the deal is a fraud or not. In order to do that you declare a new -[resource that takes the data from](resource.md#feeding-data-from-one-resource-into-another) `deals` +[transformer that takes the data from](resource.md#feeding-data-from-one-resource-into-another) `deals` resource and add it to the source. ```python import dlt from hubspot import hubspot +# source contains `deals` resource source = hubspot() -@dlt.transformer(data_from=source.deals) +@dlt.transformer def deal_scores(deal_item): # obtain the score, deal_items contains data yielded by source.deals score = model.predict(featurize(deal_item)) yield {"deal_id": deal_item, "score": score} -# add the deal_scores to the source -source.deal_scores = deal_scores -source.resources["deal_scores"] = deal_scores # this also works +# connect the data from `deals` resource into `deal_scores` and add to the source +source.resources.add(source.deals | deal_scores) # load the data: you'll see the new table `deal_scores` in your destination! pipeline.run(source) ``` +You can also set the resources in the source as follows +```python +source.deal_scores = source.deals | deal_scores +``` +or +```python +source.resources["deal_scores"] = source.deals | deal_scores +``` +:::note +When adding resource to the source, `dlt` clones the resource so your existing instance is not affected. +::: ### Reduce the nesting level of generated tables diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py index 116d7bd0ba..c487d19aa1 100644 --- a/tests/extract/test_extract.py +++ b/tests/extract/test_extract.py @@ -87,7 +87,9 @@ def input_gen(): storage = ExtractorStorage(NormalizeStorageConfiguration()) extract_id = storage.create_extract_id() schema_update = extract(extract_id, source, storage) - print(schema_update) + # both tables got generated + assert "input_gen" in schema_update + assert "gen_clone" in schema_update def test_extract_renamed_clone_and_parent(): diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py index 5c07afb2f5..146ca954bf 100644 --- a/tests/extract/test_incremental.py +++ b/tests/extract/test_incremental.py @@ -404,7 +404,6 @@ def some_data(last_timestamp=dlt.sources.incremental("item.timestamp")): @dlt.resource def standalone_some_data(now=None, last_timestamp=dlt.sources.incremental("item.timestamp")): for i in range(-10, 10): - print(i) yield {"delta": i, "item": {"timestamp": (now or pendulum.now()).add(days=i).timestamp()}} diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index 93afce8f80..506cefa355 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -705,7 +705,7 @@ def test_source(no_resources): s = test_source(10) assert s.resource_1.name == s.resources["resource_1"].name assert id(s.resource_1) == id(s.resources["resource_1"]) - with pytest.raises(KeyError): + with pytest.raises(AttributeError): s.resource_30 @@ -859,7 +859,7 @@ def tx_step(item): # add all together res_dict = DltResourceDict("source", "section") - res_dict.add([input_r , input_r | input_tx]) + res_dict.add(input_r , input_r | input_tx) assert res_dict._new_pipes == [] assert res_dict._suppress_clone_on_setitem is False assert res_dict["input_gen"]._pipe is res_dict["tx_step"]._pipe.parent @@ -892,7 +892,61 @@ def tx_step(item): # can't add resource with same name again with pytest.raises(InvalidResourceDataTypeMultiplePipes): - res_dict.add([input_r]) + res_dict.add(input_r) + + +@pytest.mark.parametrize("add_mode", ("add", "dict", "set")) +def test_add_transformer_to_source(add_mode: str) -> None: + @dlt.resource(name="numbers") + def number_gen(init): + yield from range(init, init + 5) + + + @dlt.source + def number_source(): + return number_gen + + source = number_source() + + @dlt.transformer + def multiplier(item): + return item*2 + + mul_pipe = source.numbers | multiplier() + + if add_mode == "add": + source.resources.add(mul_pipe) + elif add_mode == "dict": + source.resources["multiplier"] = mul_pipe + else: + source.multiplier = mul_pipe + + # need to bind numbers + with pytest.raises(ParametrizedResourceUnbound): + list(source) + + source.numbers.bind(10) + # both numbers and multiplier are evaluated, numbers only once + assert list(source) == [20, 10, 22, 11, 24, 12, 26, 13, 28, 14] + + +def test_unknown_resource_access() -> None: + @dlt.resource(name="numbers") + def number_gen(init): + yield from range(init, init + 5) + + + @dlt.source + def number_source(): + return number_gen + + source = number_source() + + with pytest.raises(AttributeError): + source.unknown + + with pytest.raises(KeyError): + source.resources["unknown"] def test_source_multiple_iterations() -> None: From 3816ed7529bc3df4b8b77e4663f69ebba23e16b5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Fri, 6 Oct 2023 23:50:00 +0200 Subject: [PATCH 05/15] does not clone parent pipes on call --- dlt/extract/source.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/dlt/extract/source.py b/dlt/extract/source.py index 8a6c5d9420..6cb2f7ede1 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -17,7 +17,8 @@ from dlt.common.pipeline import PipelineContext, StateInjectableContext, SupportsPipelineRun, resource_state, source_state, pipeline_state from dlt.common.utils import graph_find_scc_nodes, flatten_list_or_items, get_callable_name, graph_edges_to_nodes, multi_context_manager, uniq_id -from dlt.extract.typing import DataItemWithMeta, ItemTransformFunc, ItemTransformFunctionWithMeta, TDecompositionStrategy, TableNameMeta, FilterItem, MapItem, YieldMapItem, ValidateItem +from dlt.extract.typing import (DataItemWithMeta, ItemTransformFunc, ItemTransformFunctionWithMeta, TDecompositionStrategy, TableNameMeta, + FilterItem, MapItem, YieldMapItem, ValidateItem) from dlt.extract.pipe import Pipe, ManagedPipeIterator, TPipeStep from dlt.extract.schema import DltResourceSchema, TTableSchemaTemplate from dlt.extract.incremental import Incremental, IncrementalResourceWrapper @@ -44,12 +45,13 @@ def __init__( table_schema_template: TTableSchemaTemplate, selected: bool, incremental: IncrementalResourceWrapper = None, - section: str = None + section: str = None, + bound: bool = False ) -> None: self.section = section self.selected = selected self._pipe = pipe - self._bound = False + self._bound = bound if incremental and not self.incremental: self.add_step(incremental) self.source_name = None @@ -101,7 +103,7 @@ def from_data( # create resource from iterator, iterable or generator function if isinstance(data, (Iterable, Iterator)) or callable(data): pipe = Pipe.from_data(name, data, parent=parent_pipe) - return cls(pipe, table_schema_template, selected, incremental=incremental, section=section) + return cls(pipe, table_schema_template, selected, incremental=incremental, section=section, bound=not callable(data)) else: # some other data type that is not supported raise InvalidResourceDataType(name, data, type(data), f"The data type is {type(data).__name__}") @@ -113,7 +115,7 @@ def name(self) -> str: def with_name(self, new_name: str) -> "DltResource": """Clones the resource with a new name. Such resource keeps separate state and loads data to `new_name` table by default.""" - return self._clone(new_name=new_name) + return self._clone(new_name=new_name, with_parent=True) @property def is_transformer(self) -> bool: @@ -327,12 +329,12 @@ def state(self) -> StrAny: with inject_section(self._get_config_section_context()): return resource_state(self.name) - def _clone(self, new_name: str = None) -> "DltResource": + def _clone(self, new_name: str = None, with_parent: bool = False) -> "DltResource": """Creates a deep copy of a current resource, optionally renaming the resource. The clone will not be part of the source """ pipe = self._pipe if self._pipe and not self._pipe.is_empty: - pipe = pipe._clone(new_name=new_name, with_parent=True) + pipe = pipe._clone(new_name=new_name, with_parent=with_parent) # incremental and parent are already in the pipe (if any) return DltResource( pipe, @@ -471,7 +473,7 @@ def validate_transformer_generator_function(f: AnyFun) -> int: # produce Empty resource singleton DltResource.Empty = DltResource(Pipe(None), None, False) -TUnboundDltResource = Callable[[], DltResource] +TUnboundDltResource = Callable[..., DltResource] class DltResourceDict(Dict[str, DltResource]): From bed41416a4a560341bcaed753de019d5d7a7bdfa Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Fri, 6 Oct 2023 23:50:18 +0200 Subject: [PATCH 06/15] adds explicit standalone resources --- dlt/extract/decorators.py | 31 +++++++++++- docs/website/docs/general-usage/resource.md | 18 ++++++- tests/extract/test_decorators.py | 50 ++++++++++++++++++- tests/extract/test_sources.py | 55 +++++++++++++++++++++ 4 files changed, 149 insertions(+), 5 deletions(-) diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index f9f62a23ff..d38c7125b2 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -2,7 +2,7 @@ import inspect from types import ModuleType from functools import wraps -from typing import TYPE_CHECKING, Any, Callable, ClassVar, Iterator, List, Optional, Tuple, Type, TypeVar, Union, cast, overload +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Iterator, List, Literal, Optional, Tuple, Type, TypeVar, Union, cast, overload from dlt.common.configuration import with_config, get_fun_spec, known_sections, configspec from dlt.common.configuration.container import Container @@ -220,6 +220,23 @@ def resource( ) -> Callable[[Callable[TResourceFunParams, Any]], DltResource]: ... +@overload +def resource( + data: None = ..., + /, + name: str = None, + table_name: TTableHintTemplate[str] = None, + write_disposition: TTableHintTemplate[TWriteDisposition] = None, + columns: TTableHintTemplate[TAnySchemaColumns] = None, + primary_key: TTableHintTemplate[TColumnNames] = None, + merge_key: TTableHintTemplate[TColumnNames] = None, + selected: bool = True, + spec: Type[BaseConfiguration] = None, + standalone: Literal[True] = True +) -> Callable[[Callable[TResourceFunParams, Any]], Callable[TResourceFunParams, DltResource]]: + ... + + @overload def resource( data: Union[List[Any], Tuple[Any], Iterator[Any]], @@ -247,6 +264,7 @@ def resource( merge_key: TTableHintTemplate[TColumnNames] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, + standalone: bool = False, data_from: TUnboundDltResource = None, ) -> Any: """When used as a decorator, transforms any generator (yielding) function into a `dlt resource`. When used as a function, it transforms data in `data` argument into a `dlt resource`. @@ -297,6 +315,8 @@ def resource( spec (Type[BaseConfiguration], optional): A specification of configuration and secret values required by the source. + standalone (bool, optional): Returns a wrapped decorated function that creates DltResource instance. Must be called before use. Cannot be part of a source. + data_from (TUnboundDltResource, optional): Allows to pipe data from one resource to another to build multi-step pipelines. Raises: @@ -355,7 +375,14 @@ def decorator(f: Callable[TResourceFunParams, Any]) -> Callable[TResourceFunPara if not is_inner_resource: _SOURCES[f.__qualname__] = SourceInfo(SPEC, f, func_module) - return make_resource(resource_name, source_section, conf_f, incremental) + @wraps(conf_f) + def _wrap(*args: Any, **kwargs: Any) -> DltResource: + return make_resource(resource_name, source_section, conf_f(*args, **kwargs), incremental) + + if standalone: + return _wrap + else: + return make_resource(resource_name, source_section, conf_f, incremental) # if data is callable or none use decorator if data is None: diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index 2d45efae52..a22141046a 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -212,6 +212,20 @@ In the example above, `user_details` will receive data from default instance of pipeline.run(users(limit=100) | user_details) ``` +### Declare a standalone resource +A standalone resource is defined on a function that is top level in a module (not inner function) that accepts config and secrets values. Additionally +if `standalone` flag is specified, the decorated function signature and docstring will be preserved. `dlt.resource` will just wrap the +function decorated function and user must call the wrapper to get the actual resource. Below we declare a `filesystem` resource that must be called before use. +```python +@dlt.resource(standalone=True) +def filesystem(bucket_url=dlt.config.value): + """list and yield files in `bucket_url`""" + ... + +# `filesystem` must be called before it is extracted or used in any other way +pipeline.run(filesystem("s3://my-bucket/reports"), table_name="reports") +``` + ## Customize resources ### Filter, transform and pivot data @@ -306,8 +320,8 @@ tables.users.table_name = "other_users" ### Duplicate and rename resources There are cases when you your resources are generic (ie. bucket filesystem) and you want to load several instances of it (ie. files from different folders) to separate tables. In example below we use `filesystem` source to load csvs from two different folders into separate tables: ```python -@dlt.resource -def filesystem(bucket_url) +@dlt.resource(standalone=True) +def filesystem(bucket_url): # list and yield files in bucket_url ... diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index aa3cd27619..9c065b2dc5 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -7,6 +7,7 @@ import dlt from dlt.common.configuration import known_sections from dlt.common.configuration.container import Container +from dlt.common.configuration.exceptions import ConfigFieldMissingException from dlt.common.configuration.inject import get_fun_spec from dlt.common.configuration.resolve import inject_section from dlt.common.configuration.specs.config_section_context import ConfigSectionContext @@ -18,7 +19,7 @@ from dlt.common.schema.typing import TTableSchemaColumns from dlt.cli.source_detection import detect_source_configs -from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, ExplicitSourceNameInvalid, InconsistentTableTemplate, InvalidResourceDataTypeFunctionNotAGenerator, InvalidResourceDataTypeIsNone, ParametrizedResourceUnbound, PipeNotBoundToData, ResourceFunctionExpected, ResourceInnerCallableConfigWrapDisallowed, SourceDataIsNone, SourceIsAClassTypeError, SourceNotAFunction, SourceSchemaNotAvailable +from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, ExplicitSourceNameInvalid, InconsistentTableTemplate, InvalidResourceDataTypeFunctionNotAGenerator, InvalidResourceDataTypeIsNone, InvalidResourceDataTypeMultiplePipes, ParametrizedResourceUnbound, PipeGenInvalid, PipeNotBoundToData, ResourceFunctionExpected, ResourceInnerCallableConfigWrapDisallowed, SourceDataIsNone, SourceIsAClassTypeError, SourceNotAFunction, SourceSchemaNotAvailable from dlt.extract.source import DltResource, DltSource from dlt.common.schema.exceptions import InvalidSchemaName @@ -586,6 +587,53 @@ def invalid_disposition(): assert "write_disposition" in str(py_ex.value) +# wrapped flag will not create the resource but just simple function wrapper that must be called before use +@dlt.resource(standalone=True) +def standalone_signature(init: int, secret_end: int = dlt.secrets.value): + """Has fine docstring""" + yield from range(init, secret_end) + + +def test_standalone_resource() -> None: + + # wrapped flag will not create the resource but just simple function wrapper that must be called before use + @dlt.resource(standalone=True) + def nice_signature(init: int): + """Has nice signature""" + yield from range(init, 10) + + assert not isinstance(nice_signature, DltResource) + assert callable(nice_signature) + assert nice_signature.__doc__ == """Has nice signature""" + + assert list(nice_signature(7)) == [7, 8, 9] + + # can't work in a source + + @dlt.source + def nice_source(): + return nice_signature + + source = nice_source() + source.nice_signature.bind(7) + with pytest.raises(PipeGenInvalid): + assert list(source) == [7, 8, 9] + + @dlt.source + def many_instances(): + return nice_signature(9), nice_signature(7) + + with pytest.raises(InvalidResourceDataTypeMultiplePipes): + source = many_instances() + + with pytest.raises(ConfigFieldMissingException): + list(standalone_signature(1)) + + # make sure that config sections work + os.environ["SOURCES__TEST_DECORATORS__STANDALONE_SIGNATURE__SECRET_END"] = "5" + assert list(standalone_signature(1)) == [1, 2, 3, 4] + + def test_class_source() -> None: class _Source: diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index 506cefa355..7739abeb0f 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -660,6 +660,9 @@ def test_illegal_double_bind() -> None: def _r1(): yield ["a", "b", "c"] + assert _r1._bound is False + assert _r1()._bound is True + with pytest.raises(TypeError) as py_ex: _r1()() assert "Bound DltResource" in str(py_ex.value) @@ -668,6 +671,15 @@ def _r1(): _r1.bind().bind() assert "Bound DltResource" in str(py_ex.value) + bound_r = dlt.resource([1, 2, 3], name="rx") + assert bound_r._bound is True + with pytest.raises(TypeError): + _r1() + + def _gen(): + yield from [1, 2, 3] + + assert dlt.resource(_gen())._bound is True @dlt.resource @@ -949,6 +961,49 @@ def number_source(): source.resources["unknown"] +def test_clone_resource_on_call(): + @dlt.resource(name="gene") + def number_gen(init): + yield from range(init, init + 5) + + @dlt.transformer() + def multiplier(number, mul): + return number * mul + + gene_clone = number_gen(10) + assert gene_clone is not number_gen + assert gene_clone._pipe is not number_gen._pipe + assert gene_clone.name == number_gen.name + + pipe = number_gen | multiplier + pipe_clone = pipe(4) + assert pipe_clone._pipe is not pipe._pipe + assert pipe._pipe is multiplier._pipe + # but parents are the same + assert pipe_clone._pipe.parent is number_gen._pipe + with pytest.raises(ParametrizedResourceUnbound): + list(pipe_clone) + # bind the original directly via pipe + pipe_clone._pipe.parent.bind_gen(10) + assert list(pipe_clone) == [40, 44, 48, 52, 56] + + +def test_clone_resource_on_bind(): + @dlt.resource(name="gene") + def number_gen(): + yield from range(1, 5) + + @dlt.transformer + def multiplier(number, mul): + return number * mul + + pipe = number_gen | multiplier + bound_pipe = pipe.bind(3) + assert bound_pipe is pipe is multiplier + assert bound_pipe._pipe is pipe._pipe + assert bound_pipe._pipe.parent is pipe._pipe.parent + + def test_source_multiple_iterations() -> None: def some_data(): From 64f5e1caf2e1ef161fe51955a7edf1bcf027cc1e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 8 Oct 2023 01:58:32 +0200 Subject: [PATCH 07/15] fixes finding classes in union to sub and superclass of cls --- dlt/common/typing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dlt/common/typing.py b/dlt/common/typing.py index 0214b4f5f8..814648e0a8 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -131,7 +131,8 @@ def extract_inner_type(hint: Type[Any], preserve_new_types: bool = False) -> Typ def get_all_types_of_class_in_union(hint: Type[Any], cls: Type[TAny]) -> List[Type[TAny]]: - return [t for t in get_args(hint) if inspect.isclass(t) and issubclass(t, cls)] + # hint is an Union that contains classes, return all classes that are a subclass or superclass of cls + return [t for t in get_args(hint) if inspect.isclass(t) and (issubclass(t, cls) or issubclass(cls, t))] def get_generic_type_argument_from_instance(instance: Any, sample_value: Optional[Any]) -> Type[Any]: From 485c3c1be3ab69a54a99b92f44b27255006163b2 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 8 Oct 2023 01:58:59 +0200 Subject: [PATCH 08/15] bumps to 0.3.19a0 --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e4b7bb696e..4c00cfe55d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "dlt" -version = "0.3.18" -description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run." +version = "0.3.19a0" +description = "dlt is an open-source python-native scalable data loading library that does not require any devops efforts to run." authors = ["dltHub Inc. "] maintainers = [ "Marcin Rudolf ", "Adrian Brudaru ", "Ty Dunn "] readme = "README.md" @@ -9,7 +9,7 @@ license = "Apache-2.0" homepage = "https://github.com/dlt-hub" repository = "https://github.com/dlt-hub/dlt" classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 2 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Topic :: Software Development :: Libraries", From 44ee3d4f3860458c34f1110186e19f209b6c7d9a Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 8 Oct 2023 01:59:48 +0200 Subject: [PATCH 09/15] implements standalone transformers, cleanup of dlt.sources module --- dlt/extract/decorators.py | 47 +++++++++++++++++--- dlt/extract/pipe.py | 56 ++++++------------------ dlt/extract/source.py | 74 ++++++++++++++++++++------------ dlt/extract/utils.py | 62 +++++++++++++++++++++++++- dlt/sources/__init__.py | 7 ++- dlt/sources/config.py | 2 + dlt/sources/credentials.py | 3 +- tests/extract/test_decorators.py | 59 +++++++++++++++++++++++++ tests/extract/test_sources.py | 12 +++--- tests/pipeline/test_pipeline.py | 38 ++++++++++++++++ 10 files changed, 272 insertions(+), 88 deletions(-) create mode 100644 dlt/sources/config.py diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index d38c7125b2..d65b21ca85 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -1,5 +1,6 @@ import os import inspect +import makefun from types import ModuleType from functools import wraps from typing import TYPE_CHECKING, Any, Callable, ClassVar, Iterator, List, Literal, Optional, Tuple, Type, TypeVar, Union, cast, overload @@ -15,7 +16,7 @@ from dlt.common.source import _SOURCES, SourceInfo from dlt.common.schema.schema import Schema from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns, TWriteDisposition, TAnySchemaColumns -from dlt.extract.utils import ensure_table_schema_columns_hint +from dlt.extract.utils import ensure_table_schema_columns_hint, simulate_func_call, wrap_compat_transformer, wrap_resource_gen from dlt.common.storages.exceptions import SchemaNotFoundError from dlt.common.storages.schema_storage import SchemaStorage from dlt.common.typing import AnyFun, ParamSpec, Concatenate, TDataItem, TDataItems @@ -361,9 +362,10 @@ def decorator(f: Callable[TResourceFunParams, Any]) -> Callable[TResourceFunPara resource_sections = (known_sections.SOURCES, source_section, resource_name) # standalone resource will prefer existing section context when resolving config values # this lets the source to override those values and provide common section for all config values for resources present in that source + # for autogenerated spec do not include defaults conf_f = with_config( incr_f, - spec=spec, sections=resource_sections, sections_merge_style=ConfigSectionContext.resource_merge_style, include_defaults=False + spec=spec, sections=resource_sections, sections_merge_style=ConfigSectionContext.resource_merge_style, include_defaults=spec is not None ) is_inner_resource = is_inner_callable(f) if conf_f != incr_f and is_inner_resource: @@ -375,11 +377,21 @@ def decorator(f: Callable[TResourceFunParams, Any]) -> Callable[TResourceFunPara if not is_inner_resource: _SOURCES[f.__qualname__] = SourceInfo(SPEC, f, func_module) - @wraps(conf_f) - def _wrap(*args: Any, **kwargs: Any) -> DltResource: - return make_resource(resource_name, source_section, conf_f(*args, **kwargs), incremental) - if standalone: + if data_from: + compat_wrapper, skip_args = wrap_compat_transformer, 1 + else: + compat_wrapper, skip_args = wrap_resource_gen, 0 + + @wraps(conf_f) + def _wrap(*args: Any, **kwargs: Any) -> DltResource: + sig = simulate_func_call(conf_f, skip_args, *args, **kwargs) + r = make_resource(resource_name, source_section, compat_wrapper(resource_name, conf_f, sig, *args, **kwargs), incremental) + # consider transformer arguments bound + r._args_bound = True + # keep explicit args passed + r._set_explicit_args(conf_f, sig, *args, **kwargs) + return r return _wrap else: return make_resource(resource_name, source_section, conf_f, incremental) @@ -418,6 +430,23 @@ def transformer( ) -> Callable[[Callable[Concatenate[TDataItem, TResourceFunParams], Any]], DltResource]: ... +@overload +def transformer( + f: None = ..., + /, + data_from: TUnboundDltResource = DltResource.Empty, + name: str = None, + table_name: TTableHintTemplate[str] = None, + write_disposition: TTableHintTemplate[TWriteDisposition] = None, + columns: TTableHintTemplate[TAnySchemaColumns] = None, + primary_key: TTableHintTemplate[TColumnNames] = None, + merge_key: TTableHintTemplate[TColumnNames] = None, + selected: bool = True, + spec: Type[BaseConfiguration] = None, + standalone: Literal[True] = True +) -> Callable[[Callable[Concatenate[TDataItem, TResourceFunParams], Any]], Callable[TResourceFunParams, DltResource]]: + ... + @overload def transformer( f: Callable[Concatenate[TDataItem, TResourceFunParams], Any], @@ -445,7 +474,8 @@ def transformer( primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, selected: bool = True, - spec: Type[BaseConfiguration] = None + spec: Type[BaseConfiguration] = None, + standalone: bool = False ) -> Callable[[Callable[Concatenate[TDataItem, TResourceFunParams], Any]], DltResource]: """A form of `dlt resource` that takes input from other resources via `data_from` argument in order to enrich or transform the data. @@ -498,6 +528,8 @@ def transformer( selected (bool, optional): When `True` `dlt pipeline` will extract and load this resource, if `False`, the resource will be ignored. spec (Type[BaseConfiguration], optional): A specification of configuration and secret values required by the source. + + standalone (bool, optional): Returns a wrapped decorated function that creates DltResource instance. Must be called before use. Cannot be part of a source. """ if isinstance(f, DltResource): raise ValueError("Please pass `data_from=` argument as keyword argument. The only positional argument to transformer is the decorated function") @@ -512,6 +544,7 @@ def transformer( merge_key=merge_key, selected=selected, spec=spec, + standalone=standalone, data_from=data_from ) diff --git a/dlt/extract/pipe.py b/dlt/extract/pipe.py index c24d7433c1..d19a0f9234 100644 --- a/dlt/extract/pipe.py +++ b/dlt/extract/pipe.py @@ -6,7 +6,7 @@ from concurrent.futures import ThreadPoolExecutor from copy import copy from threading import Thread -from typing import Any, ContextManager, Dict, Optional, Sequence, Union, Callable, Iterable, Iterator, List, NamedTuple, Awaitable, Tuple, Type, TYPE_CHECKING, Literal +from typing import Any, Dict, Optional, Sequence, Union, Callable, Iterable, Iterator, List, NamedTuple, Awaitable, Tuple, Type, TYPE_CHECKING, Literal from dlt.common import sleep from dlt.common.configuration import configspec @@ -18,8 +18,11 @@ from dlt.common.typing import AnyFun, AnyType, TDataItems from dlt.common.utils import get_callable_name -from dlt.extract.exceptions import CreatePipeException, DltSourceException, ExtractorException, InvalidResourceDataTypeFunctionNotAGenerator, InvalidStepFunctionArguments, InvalidTransformerGeneratorFunction, ParametrizedResourceUnbound, PipeException, PipeGenInvalid, PipeItemProcessingError, PipeNotBoundToData, ResourceExtractionError +from dlt.extract.exceptions import (CreatePipeException, DltSourceException, ExtractorException, InvalidStepFunctionArguments, + InvalidResourceDataTypeFunctionNotAGenerator, InvalidTransformerGeneratorFunction, ParametrizedResourceUnbound, + PipeException, PipeGenInvalid, PipeItemProcessingError, PipeNotBoundToData, ResourceExtractionError) from dlt.extract.typing import DataItemWithMeta, ItemTransform, SupportsPipe, TPipedDataItems +from dlt.extract.utils import simulate_func_call, wrap_compat_transformer, wrap_resource_gen if TYPE_CHECKING: TItemFuture = Future[Union[TDataItems, DataItemWithMeta]] @@ -291,52 +294,17 @@ def _wrap_gen(self, *args: Any, **kwargs: Any) -> Any: head = self.gen _data: Any = None - if not callable(head): - # just provoke a call to raise default exception - head() # type: ignore - raise AssertionError() - - sig = inspect.signature(head) - # simulate the call to the underlying callable - if args or kwargs: - skip_items_arg = 1 if self.has_parent else 0 # skip the data item argument for transformers - no_item_sig = sig.replace(parameters=list(sig.parameters.values())[skip_items_arg:]) - try: - no_item_sig.bind(*args, **kwargs) - except TypeError as v_ex: - raise TypeError(f"{get_callable_name(head)}(): " + str(v_ex)) + # skip the data item argument for transformers + args_to_skip = 1 if self.has_parent else 0 + # simulate function call + sig = simulate_func_call(head, args_to_skip, *args, **kwargs) + assert callable(head) # create wrappers with partial if self.has_parent: - - if len(sig.parameters) == 2 and "meta" in sig.parameters: - return head - - def _tx_partial(item: TDataItems, meta: Any = None) -> Any: - # print(f"_ITEM:{item}{meta},{args}{kwargs}") - # also provide optional meta so pipe does not need to update arguments - if "meta" in kwargs: - kwargs["meta"] = meta - return head(item, *args, **kwargs) # type: ignore - - # this partial wraps transformer and sets a signature that is compatible with pipe transform calls - _data = makefun.wraps(head, new_sig=inspect.signature(_tx_partial))(_tx_partial) + _data = wrap_compat_transformer(self.name, head, sig, *args, **kwargs) else: - if inspect.isgeneratorfunction(inspect.unwrap(head)) or inspect.isgenerator(head): - # if no arguments then no wrap - if len(sig.parameters) == 0: - return head - - # always wrap generators and generator functions. evaluate only at runtime! - - def _partial() -> Any: - # print(f"_PARTIAL: {args} {kwargs} vs {args_}{kwargs_}") - return head(*args, **kwargs) # type: ignore - - # this partial preserves the original signature and just defers the call to pipe - _data = makefun.wraps(head, new_sig=inspect.signature(_partial))(_partial) - else: - raise InvalidResourceDataTypeFunctionNotAGenerator(self.name, head, type(head)) + _data = wrap_resource_gen(self.name, head, sig, *args, **kwargs) return _data def _verify_head_step(self, step: TPipeStep) -> None: diff --git a/dlt/extract/source.py b/dlt/extract/source.py index 6cb2f7ede1..5bca2cc773 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -12,7 +12,7 @@ from dlt.common.normalizers.json.relational import DataItemNormalizer as RelationalNormalizer, RelationalNormalizerConfigPropagation from dlt.common.schema import Schema from dlt.common.schema.typing import TColumnName -from dlt.common.typing import AnyFun, StrAny, TDataItem, TDataItems, NoneType +from dlt.common.typing import AnyFun, DictStrAny, StrAny, TDataItem, TDataItems, NoneType from dlt.common.configuration.container import Container from dlt.common.pipeline import PipelineContext, StateInjectableContext, SupportsPipelineRun, resource_state, source_state, pipeline_state from dlt.common.utils import graph_find_scc_nodes, flatten_list_or_items, get_callable_name, graph_edges_to_nodes, multi_context_manager, uniq_id @@ -34,10 +34,12 @@ def with_table_name(item: TDataItems, table_name: str) -> DataItemWithMeta: class DltResource(Iterable[TDataItem], DltResourceSchema): - + """Implements dlt resource. Contains a data pipe that wraps a generating item and table schema that can be adjusted""" Empty: ClassVar["DltResource"] = None source_name: str """Name of the source that contains this instance of the source, set when added to DltResourcesDict""" + section: str + """A config section name""" def __init__( self, @@ -46,12 +48,13 @@ def __init__( selected: bool, incremental: IncrementalResourceWrapper = None, section: str = None, - bound: bool = False + args_bound: bool = False ) -> None: self.section = section self.selected = selected self._pipe = pipe - self._bound = bound + self._args_bound = args_bound + self._explicit_args: DictStrAny = None if incremental and not self.incremental: self.add_step(incremental) self.source_name = None @@ -103,7 +106,7 @@ def from_data( # create resource from iterator, iterable or generator function if isinstance(data, (Iterable, Iterator)) or callable(data): pipe = Pipe.from_data(name, data, parent=parent_pipe) - return cls(pipe, table_schema_template, selected, incremental=incremental, section=section, bound=not callable(data)) + return cls(pipe, table_schema_template, selected, incremental=incremental, section=section, args_bound=not callable(data)) else: # some other data type that is not supported raise InvalidResourceDataType(name, data, type(data), f"The data type is {type(data).__name__}") @@ -123,8 +126,8 @@ def is_transformer(self) -> bool: return self._pipe.has_parent @property - def requires_binding(self) -> bool: - """Checks if resource has unbound parameters""" + def requires_args(self) -> bool: + """Checks if resource has unbound arguments""" try: self._pipe.ensure_gen_bound() return False @@ -301,8 +304,9 @@ def set_template(self, table_schema_template: TTableSchemaTemplate) -> None: def bind(self, *args: Any, **kwargs: Any) -> "DltResource": """Binds the parametrized resource to passed arguments. Modifies resource pipe in place. Does not evaluate generators or iterators.""" - if self._bound: - raise TypeError("Bound DltResource object is not callable") + if self._args_bound: + raise TypeError(f"Parametrized resource {self.name} is not callable") + orig_gen = self._pipe.gen gen = self._pipe.bind_gen(*args, **kwargs) if isinstance(gen, DltResource): # the resource returned resource: update in place @@ -320,33 +324,27 @@ def bind(self, *args: Any, **kwargs: Any) -> "DltResource": # write props from new pipe instance self._pipe.__dict__.update(gen.__dict__) else: - self._bound = True + self._args_bound = True + self._set_explicit_args(orig_gen, None, *args, **kwargs) # type: ignore return self + @property + def explicit_args(self) -> StrAny: + """Returns a dictionary of arguments used to parametrize the resource. Does not include defaults and injected args.""" + if not self._args_bound: + raise TypeError(f"Resource {self.name} is not yet parametrized") + return self._explicit_args + @property def state(self) -> StrAny: """Gets resource-scoped state from the active pipeline. PipelineStateNotAvailable is raised if pipeline context is not available""" with inject_section(self._get_config_section_context()): return resource_state(self.name) - def _clone(self, new_name: str = None, with_parent: bool = False) -> "DltResource": - """Creates a deep copy of a current resource, optionally renaming the resource. The clone will not be part of the source - """ - pipe = self._pipe - if self._pipe and not self._pipe.is_empty: - pipe = pipe._clone(new_name=new_name, with_parent=with_parent) - # incremental and parent are already in the pipe (if any) - return DltResource( - pipe, - deepcopy(self._table_schema_template), - selected=self.selected, - section=self.section - ) - def __call__(self, *args: Any, **kwargs: Any) -> "DltResource": """Binds the parametrized resources to passed arguments. Creates and returns a bound resource. Generators and iterators are not evaluated.""" - if self._bound: - raise TypeError("Bound DltResource object is not callable") + if self._args_bound: + raise TypeError(f"Parametrized resource {self.name} is not callable") r = self._clone() return r.bind(*args, **kwargs) @@ -374,6 +372,7 @@ def __iter__(self) -> Iterator[TDataItem]: state, _ = pipeline_state(container, {}) state_context = StateInjectableContext(state=state) section_context = self._get_config_section_context() + print(section_context) # managed pipe iterator will set the context on each call to __next__ with inject_section(section_context), Container().injectable_context(state_context): @@ -383,6 +382,27 @@ def __iter__(self) -> Iterator[TDataItem]: _iter = map(lambda item: item.item, pipe_iterator) return flatten_list_or_items(_iter) + def _set_explicit_args(self, f: AnyFun, sig: inspect.Signature = None, *args: Any, **kwargs: Any) -> None: + try: + sig = sig or inspect.signature(f) + self._explicit_args = sig.bind_partial(*args, **kwargs).arguments + except Exception: + pass + + def _clone(self, new_name: str = None, with_parent: bool = False) -> "DltResource": + """Creates a deep copy of a current resource, optionally renaming the resource. The clone will not be part of the source + """ + pipe = self._pipe + if self._pipe and not self._pipe.is_empty: + pipe = pipe._clone(new_name=new_name, with_parent=with_parent) + # incremental and parent are already in the pipe (if any) + return DltResource( + pipe, + deepcopy(self._table_schema_template), + selected=self.selected, + section=self.section + ) + def _get_config_section_context(self) -> ConfigSectionContext: container = Container() proxy = container[PipelineContext] @@ -418,7 +438,7 @@ def __str__(self) -> str: info += f"\nThis resource is a transformer and takes data items from {self._pipe.parent.name}" else: if self._pipe.is_data_bound: - if self.requires_binding: + if self.requires_args: head_sig = inspect.signature(self._pipe.gen) # type: ignore info += f"\nThis resource is parametrized and takes the following arguments {head_sig}. You must call this resource before loading." else: diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py index 04cb41299f..794c606040 100644 --- a/dlt/extract/utils.py +++ b/dlt/extract/utils.py @@ -1,10 +1,15 @@ +import inspect +import makefun from typing import Union, List, Any, Sequence, cast from collections.abc import Mapping as C_Mapping from dlt.common.exceptions import MissingDependencyException -from dlt.extract.typing import TTableHintTemplate, TDataItem, TFunHintTemplate from dlt.common.schema.typing import TColumnNames, TAnySchemaColumns, TTableSchemaColumns -from dlt.common.typing import TDataItem +from dlt.common.typing import AnyFun, TDataItem, TDataItems +from dlt.common.utils import get_callable_name +from dlt.extract.exceptions import InvalidResourceDataTypeFunctionNotAGenerator + +from dlt.extract.typing import TTableHintTemplate, TDataItem, TFunHintTemplate try: from dlt.common.libs import pydantic @@ -55,3 +60,56 @@ def wrapper(item: TDataItem) -> TTableSchemaColumns: return wrapper return ensure_table_schema_columns(columns) + + +def simulate_func_call(f: Union[Any, AnyFun], args_to_skip: int, *args: Any, **kwargs: Any) -> inspect.Signature: + """Simulates a call to a resource or transformer function before it will be wrapped for later execution in the pipe""" + if not callable(f): + # just provoke a call to raise default exception + f() + assert callable(f) + + sig = inspect.signature(f) + # simulate the call to the underlying callable + if args or kwargs: + no_item_sig = sig.replace(parameters=list(sig.parameters.values())[args_to_skip:]) + try: + no_item_sig.bind(*args, **kwargs) + except TypeError as v_ex: + raise TypeError(f"{get_callable_name(f)}(): " + str(v_ex)) + return sig + + +def wrap_compat_transformer(name: str, f: AnyFun, sig: inspect.Signature, *args: Any, **kwargs: Any) -> AnyFun: + """Creates a compatible wrapper over transformer function. A pure transformer function expects data item in first argument and one keyword argument called `meta`""" + if len(sig.parameters) == 2 and "meta" in sig.parameters: + return f + + def _tx_partial(item: TDataItems, meta: Any = None) -> Any: + # print(f"_ITEM:{item}{meta},{args}{kwargs}") + # also provide optional meta so pipe does not need to update arguments + if "meta" in kwargs: + kwargs["meta"] = meta + return f(item, *args, **kwargs) + + # this partial wraps transformer and sets a signature that is compatible with pipe transform calls + return makefun.wraps(f, new_sig=inspect.signature(_tx_partial))(_tx_partial) # type: ignore + + +def wrap_resource_gen(name: str, f: AnyFun, sig: inspect.Signature, *args: Any, **kwargs: Any) -> AnyFun: + """Wraps a generator or generator function so it is evaluated on extraction""" + if inspect.isgeneratorfunction(inspect.unwrap(f)) or inspect.isgenerator(f): + # if no arguments then no wrap + # if len(sig.parameters) == 0: + # return f + + # always wrap generators and generator functions. evaluate only at runtime! + + def _partial() -> Any: + # print(f"_PARTIAL: {args} {kwargs} vs {args_}{kwargs_}") + return f(*args, **kwargs) + + # this partial preserves the original signature and just defers the call to pipe + return makefun.wraps(f, new_sig=inspect.signature(_partial))(_partial) # type: ignore + else: + raise InvalidResourceDataTypeFunctionNotAGenerator(name, f, type(f)) diff --git a/dlt/sources/__init__.py b/dlt/sources/__init__.py index 2a79dca8bd..835ce80519 100644 --- a/dlt/sources/__init__.py +++ b/dlt/sources/__init__.py @@ -1,2 +1,7 @@ """Module with built in sources and source building blocks""" -from dlt.extract.incremental import Incremental as incremental \ No newline at end of file +from dlt.extract.incremental import Incremental as incremental +from dlt.common.storages.filesystem import FileItem +from dlt.extract.source import DltSource, DltResource +from dlt.common.typing import TDataItem, TDataItems +from . import credentials +from . import config \ No newline at end of file diff --git a/dlt/sources/config.py b/dlt/sources/config.py new file mode 100644 index 0000000000..d58c210ab6 --- /dev/null +++ b/dlt/sources/config.py @@ -0,0 +1,2 @@ +from dlt.common.configuration.specs import configspec +from dlt.common.configuration.inject import with_config \ No newline at end of file diff --git a/dlt/sources/credentials.py b/dlt/sources/credentials.py index a4cc38da88..70e88beb49 100644 --- a/dlt/sources/credentials.py +++ b/dlt/sources/credentials.py @@ -1,4 +1,5 @@ from dlt.common.configuration.specs import GcpServiceAccountCredentials, GcpOAuthCredentials, GcpCredentials from dlt.common.configuration.specs import ConnectionStringCredentials from dlt.common.configuration.specs import OAuth2Credentials -from dlt.common.configuration.specs import CredentialsConfiguration, configspec \ No newline at end of file +from dlt.common.configuration.specs import CredentialsConfiguration, configspec +from dlt.common.storages.configuration import FileSystemCredentials, FilesystemConfiguration \ No newline at end of file diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index 9c065b2dc5..5b7e824b4e 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -19,6 +19,7 @@ from dlt.common.schema.typing import TTableSchemaColumns from dlt.cli.source_detection import detect_source_configs +from dlt.common.typing import TDataItem from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, ExplicitSourceNameInvalid, InconsistentTableTemplate, InvalidResourceDataTypeFunctionNotAGenerator, InvalidResourceDataTypeIsNone, InvalidResourceDataTypeMultiplePipes, ParametrizedResourceUnbound, PipeGenInvalid, PipeNotBoundToData, ResourceFunctionExpected, ResourceInnerCallableConfigWrapDisallowed, SourceDataIsNone, SourceIsAClassTypeError, SourceNotAFunction, SourceSchemaNotAvailable from dlt.extract.source import DltResource, DltSource from dlt.common.schema.exceptions import InvalidSchemaName @@ -607,6 +608,10 @@ def nice_signature(init: int): assert nice_signature.__doc__ == """Has nice signature""" assert list(nice_signature(7)) == [7, 8, 9] + assert nice_signature(8)._args_bound is True + with pytest.raises(TypeError): + # bound! + nice_signature(7)() # can't work in a source @@ -629,11 +634,65 @@ def many_instances(): with pytest.raises(ConfigFieldMissingException): list(standalone_signature(1)) + # use wrong signature + with pytest.raises(TypeError): + nice_signature(unk_kw=1, second_unk_kw="A") # type: ignore + # make sure that config sections work os.environ["SOURCES__TEST_DECORATORS__STANDALONE_SIGNATURE__SECRET_END"] = "5" assert list(standalone_signature(1)) == [1, 2, 3, 4] +@dlt.transformer(standalone=True) +def standalone_transformer(item: TDataItem, init: int, secret_end: int = dlt.secrets.value): + """Has fine transformer docstring""" + yield from range(item + init, secret_end) + + +@dlt.transformer(standalone=True) +def standalone_transformer_returns(item: TDataItem, init: int = dlt.config.value): + """Has fine transformer docstring""" + return "A" * item * init + + +def test_standalone_transformer() -> None: + assert not isinstance(standalone_transformer, DltResource) + assert callable(standalone_transformer) + assert standalone_transformer.__doc__ == """Has fine transformer docstring""" + + bound_tx = standalone_transformer(5, 10) + # this is not really true + assert bound_tx._args_bound is True + with pytest.raises(TypeError): + bound_tx(1) + assert isinstance(bound_tx, DltResource) + # the resource sets the start of the range of transformer + transformer init + assert list(standalone_signature(1, 3) | bound_tx) == [6, 7, 8, 9, 7, 8, 9] + + # wrong params to transformer + with pytest.raises(TypeError): + standalone_transformer(unk_kw="ABC") # type: ignore + + # test transformer that returns + bound_tx = standalone_transformer_returns(2) + assert list(standalone_signature(1, 3) | bound_tx) == ["AA", "AAAA"] + + # test configuration + os.environ["SOURCES__TEST_DECORATORS__STANDALONE_SIGNATURE__SECRET_END"] = "5" + os.environ["SOURCES__TEST_DECORATORS__STANDALONE_TRANSFORMER_RETURNS__INIT"] = "2" + assert list(standalone_signature(1) | standalone_transformer_returns()) == ["AA", "AAAA", "AAAAAA", "AAAAAAAA"] + + +def test_resource_rename_credentials_separation(): + os.environ["SOURCES__TEST_DECORATORS__STANDALONE_SIGNATURE__SECRET_END"] = "5" + assert list(standalone_signature(1)) == [1, 2, 3, 4] + + # config section is not impacted by the rename + # NOTE: probably we should keep it like that + os.environ["SOURCES__TEST_DECORATORS__RENAMED_SIG__SECRET_END"] = "6" + assert list(standalone_signature(1).with_name("renamed_sig")) == [1, 2, 3, 4] + + def test_class_source() -> None: class _Source: diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index 7739abeb0f..d8223f2ee8 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -660,26 +660,26 @@ def test_illegal_double_bind() -> None: def _r1(): yield ["a", "b", "c"] - assert _r1._bound is False - assert _r1()._bound is True + assert _r1._args_bound is False + assert _r1()._args_bound is True with pytest.raises(TypeError) as py_ex: _r1()() - assert "Bound DltResource" in str(py_ex.value) + assert "Parametrized resource" in str(py_ex.value) with pytest.raises(TypeError) as py_ex: _r1.bind().bind() - assert "Bound DltResource" in str(py_ex.value) + assert "Parametrized resource" in str(py_ex.value) bound_r = dlt.resource([1, 2, 3], name="rx") - assert bound_r._bound is True + assert bound_r._args_bound is True with pytest.raises(TypeError): _r1() def _gen(): yield from [1, 2, 3] - assert dlt.resource(_gen())._bound is True + assert dlt.resource(_gen())._args_bound is True @dlt.resource diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 53516d51db..c3fd20b04c 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -1090,3 +1090,41 @@ def users() -> Iterator[User]: expect_extracted_file( storage, pipeline.default_schema_name, "users", json.dumps([{"user_id": 1, "name": "a"}, {"user_id": 2, "name": "b"}]) ) + + +def test_resource_rename_same_table(): + @dlt.resource(write_disposition="replace") + def generic(start): + dlt.current.resource_state()["start"] = start + yield [{"id": idx, "text": "A"*idx} for idx in range(start, start + 10)] + + pipeline = dlt.pipeline(destination='duckdb') + load_info = pipeline.run([ + generic(10).with_name("state1"), + generic(20).with_name("state2") + ], table_name="single_table") + assert_load_info(load_info) + # both resources loaded + assert pipeline.last_trace.last_normalize_info.row_counts["single_table"] == 20 + # only this table and state + assert len(pipeline.last_trace.last_normalize_info.row_counts) == 2 + + # check state + # state1 should have 10 + assert generic(0).with_name("state1").state["start"] == 10 + # state2 is 10 + assert generic(0).with_name("state2").state["start"] == 20 + + # NOTE: only one resource will be set in table + assert pipeline.default_schema.get_table("single_table")["resource"] == "state2" + + # now load only state1 + load_info = pipeline.run([ + generic(5).with_name("state1"), + ], table_name="single_table") + assert_load_info(load_info) + # both resources loaded + assert pipeline.last_trace.last_normalize_info.row_counts["single_table"] == 10 + assert generic(0).with_name("state1").state["start"] == 5 + # resource got swapped to the most recent one + assert pipeline.default_schema.get_table("single_table")["resource"] == "state1" From 254d80982627299deb8e6138cf8f0ef846d047d1 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 8 Oct 2023 22:30:27 +0200 Subject: [PATCH 10/15] reorganizes dlt.sources.filesystem module --- dlt/common/configuration/resolve.py | 3 - dlt/common/storages/__init__.py | 2 +- .../{filesystem.py => fsspec_filesystem.py} | 86 ++++++++++++++++++- dlt/common/storages/transactional_file.py | 2 +- dlt/destinations/filesystem/filesystem.py | 6 +- dlt/extract/source.py | 1 - dlt/sources/__init__.py | 4 +- dlt/sources/filesystem.py | 1 + docs/website/docs/general-usage/resource.md | 5 -- .../storages/test_transactional_file.py | 4 +- .../load/filesystem/test_filesystem_common.py | 6 +- 11 files changed, 95 insertions(+), 25 deletions(-) rename dlt/common/storages/{filesystem.py => fsspec_filesystem.py} (50%) create mode 100644 dlt/sources/filesystem.py diff --git a/dlt/common/configuration/resolve.py b/dlt/common/configuration/resolve.py index 68421d7d4b..8e1af2831e 100644 --- a/dlt/common/configuration/resolve.py +++ b/dlt/common/configuration/resolve.py @@ -47,7 +47,6 @@ def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfigur first_credentials: CredentialsConfiguration = None for idx, spec in enumerate(specs_in_union): try: - # print(spec) credentials = spec(initial_value) if credentials.is_resolved(): return credentials @@ -87,7 +86,6 @@ def inject_section(section_context: ConfigSectionContext, merge_existing: bool = def _maybe_parse_native_value(config: TConfiguration, explicit_value: Any, embedded_sections: Tuple[str, ...]) -> Any: # use initial value to resolve the whole configuration. if explicit value is a mapping it will be applied field by field later if explicit_value and (not isinstance(explicit_value, C_Mapping) or isinstance(explicit_value, BaseConfiguration)): - # print(f"TRYING TO PARSE NATIVE from {explicit_value}") try: config.parse_native_representation(explicit_value) except ValueError as v_err: @@ -167,7 +165,6 @@ def _resolve_config_fields( specs_in_union: List[Type[BaseConfiguration]] = [] current_value = None if is_union(hint): - # print(f"HINT UNION?: {key}:{hint}") # if union contains a type of explicit value which is not a valid hint, return it as current value if explicit_value and not is_valid_hint(type(explicit_value)) and get_all_types_of_class_in_union(hint, type(explicit_value)): current_value, traces = explicit_value, [] diff --git a/dlt/common/storages/__init__.py b/dlt/common/storages/__init__.py index f318b09ee0..05a4f36119 100644 --- a/dlt/common/storages/__init__.py +++ b/dlt/common/storages/__init__.py @@ -6,4 +6,4 @@ from .load_storage import LoadStorage # noqa: F401 from .data_item_storage import DataItemStorage # noqa: F401 from .configuration import LoadStorageConfiguration, NormalizeStorageConfiguration, SchemaStorageConfiguration, TSchemaFileFormat, FilesystemConfiguration # noqa: F401 -from .filesystem import filesystem_from_config, filesystem # noqa: F401 \ No newline at end of file +from .fsspec_filesystem import fsspec_from_config, fsspec_filesystem # noqa: F401 \ No newline at end of file diff --git a/dlt/common/storages/filesystem.py b/dlt/common/storages/fsspec_filesystem.py similarity index 50% rename from dlt/common/storages/filesystem.py rename to dlt/common/storages/fsspec_filesystem.py index 55711fa4fb..7032bcb122 100644 --- a/dlt/common/storages/filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -1,4 +1,6 @@ -from typing import cast, Tuple, TypedDict, Optional, Union +import io +from io import BytesIO, IOBase +from typing import cast, Tuple, TypedDict, Optional, Union, Any from fsspec.core import url_to_fs from fsspec import AbstractFileSystem @@ -39,7 +41,7 @@ class FileItem(TypedDict): MTIME_DISPATCH["abfs"] = MTIME_DISPATCH["az"] -def filesystem(protocol: str, credentials: FileSystemCredentials = None) -> Tuple[AbstractFileSystem, str]: +def fsspec_filesystem(protocol: str, credentials: FileSystemCredentials = None) -> Tuple[AbstractFileSystem, str]: """Instantiates an authenticated fsspec `FileSystem` for a given `protocol` and credentials. Please supply credentials instance corresponding to the protocol. The `protocol` is just the code name of the filesystem ie: @@ -49,11 +51,11 @@ def filesystem(protocol: str, credentials: FileSystemCredentials = None) -> Tupl also see filesystem_from_config """ - return filesystem_from_config(FilesystemConfiguration(protocol, credentials)) + return fsspec_from_config(FilesystemConfiguration(protocol, credentials)) -def filesystem_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSystem, str]: +def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSystem, str]: """Instantiates an authenticated fsspec `FileSystem` from `config` argument. Authenticates following filesystems: @@ -84,3 +86,79 @@ def filesystem_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFil return url_to_fs(config.bucket_url, use_listings_cache=False, **fs_kwargs) # type: ignore[no-any-return] except ModuleNotFoundError as e: raise MissingDependencyException("filesystem", [f"{version.DLT_PKG_NAME}[{proto}]"]) from e + + +class FileItemDict(DictStrAny): + """A FileItem dictionary with additional methods to get fsspec filesystem, open and read files. + """ + + def __init__( + self, mapping: FileItem, credentials: Optional[Union[FileSystemCredentials, AbstractFileSystem]] = None + ): + """Create a dictionary with the filesystem client. + + Args: + mapping (FileItem): The file item TypedDict. + credentials (Optional[FileSystemCredentials], optional): The credentials to the + filesystem. Defaults to None. + """ + self.credentials = credentials + super().__init__(**mapping) + + @property + def fsspec(self) -> AbstractFileSystem: + """The filesystem client based on the given credentials. + + Returns: + AbstractFileSystem: The fsspec client. + """ + if isinstance(self.credentials, AbstractFileSystem): + return self.credentials + else: + return fsspec_filesystem(self["file_url"], self.credentials)[0] + + def open(self, **kwargs: Any) -> IOBase: # noqa: A003 + """Open the file as a fsspec file. + + This method opens the file represented by this dictionary as a file-like object using + the fsspec library. + + Args: + **kwargs (Any): The arguments to pass to the fsspec open function. + + Returns: + IOBase: The fsspec file. + """ + opened_file: IOBase + # if the user has already extracted the content, we use it so there will be no need to + # download the file again. + if self["file_content"] in self: + bytes_io = BytesIO(self["file_content"]) + + text_kwargs = { + k: kwargs.pop(k) + for k in ["encoding", "errors", "newline"] + if k in kwargs + } + return io.TextIOWrapper( + bytes_io, + **text_kwargs, + ) + else: + opened_file = self.fsspec.open(self["file_url"], **kwargs) + return opened_file + + def read_bytes(self) -> bytes: + """Read the file content. + + Returns: + bytes: The file content. + """ + content: bytes + # same as open, if the user has already extracted the content, we use it. + if "file_content" in self and self["file_content"] is not None: + content = self["file_content"] + else: + content = self.fsspec.read_bytes(self["file_url"]) + return content + diff --git a/dlt/common/storages/transactional_file.py b/dlt/common/storages/transactional_file.py index cae50d1951..9a10c812e2 100644 --- a/dlt/common/storages/transactional_file.py +++ b/dlt/common/storages/transactional_file.py @@ -16,7 +16,7 @@ import fsspec from dlt.common.pendulum import pendulum, timedelta -from dlt.common.storages.filesystem import MTIME_DISPATCH +from dlt.common.storages.fsspec_filesystem import MTIME_DISPATCH def lock_id(k: int = 4) -> str: diff --git a/dlt/destinations/filesystem/filesystem.py b/dlt/destinations/filesystem/filesystem.py index 3691c6417b..1d9caf036d 100644 --- a/dlt/destinations/filesystem/filesystem.py +++ b/dlt/destinations/filesystem/filesystem.py @@ -6,7 +6,7 @@ from dlt.common import logger from dlt.common.schema import Schema, TSchemaTables, TTableSchema -from dlt.common.storages import FileStorage, LoadStorage, filesystem_from_config +from dlt.common.storages import FileStorage, LoadStorage, fsspec_from_config from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import NewLoadJob, TLoadJobState, LoadJob, JobClientBase, FollowupJob @@ -33,7 +33,7 @@ def __init__( self.destination_file_name = LoadFilesystemJob.make_destination_filename(config.layout, file_name, schema_name, load_id) super().__init__(file_name) - fs_client, _ = filesystem_from_config(config) + fs_client, _ = fsspec_from_config(config) self.destination_file_name = LoadFilesystemJob.make_destination_filename(config.layout, file_name, schema_name, load_id) item = self.make_remote_path() logger.info("PUT file {item}") @@ -77,7 +77,7 @@ class FilesystemClient(JobClientBase): def __init__(self, schema: Schema, config: FilesystemDestinationClientConfiguration) -> None: super().__init__(schema, config) - self.fs_client, self.fs_path = filesystem_from_config(config) + self.fs_client, self.fs_path = fsspec_from_config(config) self.config: FilesystemDestinationClientConfiguration = config # verify files layout. we need {table_name} and only allow {schema_name} before it, otherwise tables # cannot be replaced and we cannot initialize folders consistently diff --git a/dlt/extract/source.py b/dlt/extract/source.py index 5bca2cc773..c271944dc8 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -372,7 +372,6 @@ def __iter__(self) -> Iterator[TDataItem]: state, _ = pipeline_state(container, {}) state_context = StateInjectableContext(state=state) section_context = self._get_config_section_context() - print(section_context) # managed pipe iterator will set the context on each call to __next__ with inject_section(section_context), Container().injectable_context(state_context): diff --git a/dlt/sources/__init__.py b/dlt/sources/__init__.py index 835ce80519..f080e736d3 100644 --- a/dlt/sources/__init__.py +++ b/dlt/sources/__init__.py @@ -1,7 +1,7 @@ """Module with built in sources and source building blocks""" from dlt.extract.incremental import Incremental as incremental -from dlt.common.storages.filesystem import FileItem from dlt.extract.source import DltSource, DltResource from dlt.common.typing import TDataItem, TDataItems from . import credentials -from . import config \ No newline at end of file +from . import config +from . import filesystem \ No newline at end of file diff --git a/dlt/sources/filesystem.py b/dlt/sources/filesystem.py new file mode 100644 index 0000000000..47e8e26890 --- /dev/null +++ b/dlt/sources/filesystem.py @@ -0,0 +1 @@ +from dlt.common.storages.fsspec_filesystem import FileItem, FileItemDict, fsspec_filesystem \ No newline at end of file diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index a22141046a..26d2b19802 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -366,11 +366,6 @@ pipeline.run(generate_rows(10)) # load a list of resources pipeline.run([generate_rows(10), generate_rows(20)]) ``` - -### Resource state -[Resource state](state.md#read-and-write-pipeline-state-in-a-resource) is kept - - ### Do a full refresh To do a full refresh of an `append` or `merge` resources you temporarily change the write diff --git a/tests/common/storages/test_transactional_file.py b/tests/common/storages/test_transactional_file.py index 5ab91400a5..119b5ee3dd 100644 --- a/tests/common/storages/test_transactional_file.py +++ b/tests/common/storages/test_transactional_file.py @@ -7,7 +7,7 @@ import fsspec import pytest -from dlt.common.storages import filesystem +from dlt.common.storages import fsspec_filesystem from dlt.common.storages.transactional_file import TransactionalFile from tests.utils import skipifwindows @@ -15,7 +15,7 @@ @pytest.fixture(scope="session") def fs() -> fsspec.AbstractFileSystem: - return filesystem("file")[0] + return fsspec_filesystem("file")[0] @pytest.fixture diff --git a/tests/load/filesystem/test_filesystem_common.py b/tests/load/filesystem/test_filesystem_common.py index 5d39e91bc2..2e002e548b 100644 --- a/tests/load/filesystem/test_filesystem_common.py +++ b/tests/load/filesystem/test_filesystem_common.py @@ -6,8 +6,8 @@ from dlt.common import pendulum from dlt.common.configuration.specs import AzureCredentials, AzureCredentialsWithoutDefaults -from dlt.common.storages import filesystem_from_config, FilesystemConfiguration -from dlt.common.storages.filesystem import MTIME_DISPATCH +from dlt.common.storages import fsspec_from_config, FilesystemConfiguration +from dlt.common.storages.fsspec_filesystem import MTIME_DISPATCH from dlt.common.utils import uniq_id from tests.utils import preserve_environ, autouse_test_storage @@ -31,7 +31,7 @@ def test_filesystem_instance(all_buckets_env: str) -> None: bucket_url = os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] config = get_config() assert bucket_url.startswith(config.protocol) - filesystem, url = filesystem_from_config(config) + filesystem, url = fsspec_from_config(config) if config.protocol != "file": assert bucket_url.endswith(url) # do a few file ops From 2c7e4de0d4ac0c9e2e468c6f9720e62a8c0894b5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 9 Oct 2023 00:27:37 +0200 Subject: [PATCH 11/15] removes too early binding of resource name in incremental --- dlt/common/storages/fsspec_filesystem.py | 25 +++++++++++++----------- dlt/extract/decorators.py | 2 +- dlt/extract/incremental.py | 9 +++++---- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index 7032bcb122..fbb4eccc39 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -117,7 +117,7 @@ def fsspec(self) -> AbstractFileSystem: else: return fsspec_filesystem(self["file_url"], self.credentials)[0] - def open(self, **kwargs: Any) -> IOBase: # noqa: A003 + def open(self, mode: str = "rb", **kwargs: Any) -> IOBase: # noqa: A003 """Open the file as a fsspec file. This method opens the file represented by this dictionary as a file-like object using @@ -132,18 +132,21 @@ def open(self, **kwargs: Any) -> IOBase: # noqa: A003 opened_file: IOBase # if the user has already extracted the content, we use it so there will be no need to # download the file again. - if self["file_content"] in self: + if "file_content" in self: bytes_io = BytesIO(self["file_content"]) - text_kwargs = { - k: kwargs.pop(k) - for k in ["encoding", "errors", "newline"] - if k in kwargs - } - return io.TextIOWrapper( - bytes_io, - **text_kwargs, - ) + if "t" in mode: + text_kwargs = { + k: kwargs.pop(k) + for k in ["encoding", "errors", "newline"] + if k in kwargs + } + return io.TextIOWrapper( + bytes_io, + **text_kwargs, + ) + else: + return bytes_io else: opened_file = self.fsspec.open(self["file_url"], **kwargs) return opened_file diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index d65b21ca85..3cb30726e4 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -356,7 +356,7 @@ def decorator(f: Callable[TResourceFunParams, Any]) -> Callable[TResourceFunPara incremental: IncrementalResourceWrapper = None sig = inspect.signature(f) if IncrementalResourceWrapper.should_wrap(sig): - incremental = IncrementalResourceWrapper(resource_name, primary_key) + incremental = IncrementalResourceWrapper(primary_key) incr_f = incremental.wrap(sig, f) if incremental else f resource_sections = (known_sections.SOURCES, source_section, resource_name) diff --git a/dlt/extract/incremental.py b/dlt/extract/incremental.py index 46175d45be..ebc54530cc 100644 --- a/dlt/extract/incremental.py +++ b/dlt/extract/incremental.py @@ -380,8 +380,9 @@ def __str__(self) -> str: class IncrementalResourceWrapper(FilterItem): _incremental: Optional[Incremental[Any]] = None """Keeps the injectable incremental""" + _resource_name: str = None - def __init__(self, resource_name: str, primary_key: Optional[TTableHintTemplate[TColumnNames]] = None) -> None: + def __init__(self, primary_key: Optional[TTableHintTemplate[TColumnNames]] = None) -> None: """Creates a wrapper over a resource function that accepts Incremental instance in its argument to perform incremental loading. The wrapper delays instantiation of the Incremental to the moment of actual execution and is currently used by `dlt.resource` decorator. @@ -389,10 +390,8 @@ def __init__(self, resource_name: str, primary_key: Optional[TTableHintTemplate[ Note that wrapper implements `FilterItem` transform interface and functions as a processing step in the before-mentioned resource pipe. Args: - resource_name (str): A name of resource to which the Incremental will be bound at execution primary_key (TTableHintTemplate[TColumnKey], optional): A primary key to be passed to Incremental Instance at execution. Defaults to None. """ - self.resource_name = resource_name self.primary_key = primary_key self.incremental_state: IncrementalColumnState = None self._allow_external_schedulers: bool = None @@ -456,7 +455,8 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: self._incremental = new_incremental self._incremental.resolve() # in case of transformers the bind will be called before this wrapper is set: because transformer is called for a first time late in the pipe - self._incremental.bind(Pipe(self.resource_name)) + if self._resource_name: + self._incremental.bind(Pipe(self._resource_name)) bound_args.arguments[p.name] = self._incremental return func(*bound_args.args, **bound_args.kwargs) @@ -476,6 +476,7 @@ def allow_external_schedulers(self, value: bool) -> None: self._incremental.allow_external_schedulers = value def bind(self, pipe: SupportsPipe) -> "IncrementalResourceWrapper": + self._resource_name = pipe.name if self._incremental: if self._allow_external_schedulers is not None: self._incremental.allow_external_schedulers = self._allow_external_schedulers From 9dacf5c021279b57ec0ba4574fb78b6c23a888ed Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 9 Oct 2023 15:27:39 +0200 Subject: [PATCH 12/15] improves tests and docstring for state reset on replace --- dlt/common/pipeline.py | 4 ++-- dlt/extract/extract.py | 6 +++--- dlt/extract/pipe.py | 1 - dlt/extract/source.py | 2 +- dlt/extract/typing.py | 6 ++++++ dlt/extract/utils.py | 14 +++++++++++--- dlt/pipeline/helpers.py | 4 ++-- tests/extract/test_incremental.py | 30 +++++++++++++++++++++++++++--- 8 files changed, 52 insertions(+), 15 deletions(-) diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py index ebc33e2513..aeb0bdc68a 100644 --- a/dlt/common/pipeline.py +++ b/dlt/common/pipeline.py @@ -424,8 +424,8 @@ def resource_state(resource_name: str = None, source_state_: Optional[DictStrAny return state_.setdefault('resources', {}).setdefault(resource_name, {}) # type: ignore -def _reset_resource_state(resource_name: str, source_state_: Optional[DictStrAny] = None, /) -> None: - """Alpha version of the resource state. Resets the resource state +def reset_resource_state(resource_name: str, source_state_: Optional[DictStrAny] = None, /) -> None: + """Resets the resource state with name `resource_name` by removing it from `source_state` Args: resource_name: The resource key to reset diff --git a/dlt/extract/extract.py b/dlt/extract/extract.py index 5a7e2afa30..f5bc5e4888 100644 --- a/dlt/extract/extract.py +++ b/dlt/extract/extract.py @@ -5,7 +5,7 @@ from dlt.common.configuration.container import Container from dlt.common.configuration.resolve import inject_section from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.pipeline import _reset_resource_state +from dlt.common.pipeline import reset_resource_state from dlt.common.runtime import signals from dlt.common.runtime.collector import Collector, NULL_COLLECTOR @@ -181,11 +181,11 @@ def extract_with_schema( with Container().injectable_context(SourceSchemaInjectableContext(schema)): # inject the config section with the current source name with inject_section(ConfigSectionContext(sections=(known_sections.SOURCES, source.section, source.name), source_state_key=source.name)): - # reset resource states + # reset resource states, the `extracted` list contains all the explicit resources and all their parents for resource in source.resources.extracted.values(): with contextlib.suppress(DataItemRequiredForDynamicTableHints): if resource.write_disposition == "replace": - _reset_resource_state(resource.name) + reset_resource_state(resource.name) extractor = extract(extract_id, source, storage, collector, max_parallel_items=max_parallel_items, workers=workers) # iterate over all items in the pipeline and update the schema if dynamic table hints were present diff --git a/dlt/extract/pipe.py b/dlt/extract/pipe.py index d19a0f9234..fd058b8fa1 100644 --- a/dlt/extract/pipe.py +++ b/dlt/extract/pipe.py @@ -125,7 +125,6 @@ def is_empty(self) -> bool: @property def has_parent(self) -> bool: - """Checks if pipe is connected to parent pipe from which it takes data items. Connected pipes are created from transformer resources""" return self.parent is not None @property diff --git a/dlt/extract/source.py b/dlt/extract/source.py index c271944dc8..0b910eb089 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -527,7 +527,7 @@ def extracted(self) -> Dict[str, DltResource]: # resource for pipe not found: return mock resource mock_template = DltResourceSchema.new_table_template( pipe.name, - write_disposition=resource._table_schema_template.get("write_disposition") + write_disposition=resource.write_disposition ) resource = DltResource(pipe, mock_template, False, section=resource.section) resource.source_name = resource.source_name diff --git a/dlt/extract/typing.py b/dlt/extract/typing.py index 5f32556f92..ad4e23b84f 100644 --- a/dlt/extract/typing.py +++ b/dlt/extract/typing.py @@ -39,6 +39,12 @@ class SupportsPipe(Protocol): """A protocol with the core Pipe properties and operations""" name: str """Pipe name which is inherited by a resource""" + parent: "SupportsPipe" + """A parent of the current pipe""" + @property + def has_parent(self) -> bool: + """Checks if pipe is connected to parent pipe from which it takes data items. Connected pipes are created from transformer resources""" + ... ItemTransformFunctionWithMeta = Callable[[TDataItem, str], TAny] diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py index 794c606040..5efe510f33 100644 --- a/dlt/extract/utils.py +++ b/dlt/extract/utils.py @@ -1,15 +1,16 @@ import inspect import makefun -from typing import Union, List, Any, Sequence, cast +from typing import Optional, Union, List, Any, Sequence, cast from collections.abc import Mapping as C_Mapping from dlt.common.exceptions import MissingDependencyException +from dlt.common.pipeline import reset_resource_state from dlt.common.schema.typing import TColumnNames, TAnySchemaColumns, TTableSchemaColumns -from dlt.common.typing import AnyFun, TDataItem, TDataItems +from dlt.common.typing import AnyFun, DictStrAny, TDataItem, TDataItems from dlt.common.utils import get_callable_name from dlt.extract.exceptions import InvalidResourceDataTypeFunctionNotAGenerator -from dlt.extract.typing import TTableHintTemplate, TDataItem, TFunHintTemplate +from dlt.extract.typing import TTableHintTemplate, TDataItem, TFunHintTemplate, SupportsPipe try: from dlt.common.libs import pydantic @@ -62,6 +63,13 @@ def wrapper(item: TDataItem) -> TTableSchemaColumns: return ensure_table_schema_columns(columns) +def reset_pipe_state(pipe: SupportsPipe, source_state_: Optional[DictStrAny] = None) -> None: + """Resets the resource state for a `pipe` and all its parent pipes""" + if pipe.has_parent: + reset_pipe_state(pipe.parent, source_state_) + reset_resource_state(pipe.name, source_state_) + + def simulate_func_call(f: Union[Any, AnyFun], args_to_skip: int, *args: Any, **kwargs: Any) -> inspect.Signature: """Simulates a call to a resource or transformer function before it will be wrapped for later execution in the pipe""" if not callable(f): diff --git a/dlt/pipeline/helpers.py b/dlt/pipeline/helpers.py index 7d1b210fb1..670b2a7887 100644 --- a/dlt/pipeline/helpers.py +++ b/dlt/pipeline/helpers.py @@ -7,7 +7,7 @@ from dlt.common.schema.utils import group_tables_by_resource, compile_simple_regexes, compile_simple_regex from dlt.common.schema.typing import TSimpleRegex from dlt.common.typing import REPattern -from dlt.common.pipeline import TSourceState, _reset_resource_state, _sources_state, _delete_source_state_keys, _get_matching_resources +from dlt.common.pipeline import TSourceState, reset_resource_state, _sources_state, _delete_source_state_keys, _get_matching_resources from dlt.common.destination.reference import WithStagingDataset from dlt.destinations.exceptions import DatabaseUndefinedRelation @@ -146,7 +146,7 @@ def _create_modified_state(self) -> Dict[str, Any]: if self.drop_state: for key in _get_matching_resources(self.resource_pattern, source_state): self.info['resource_states'].append(key) - _reset_resource_state(key, source_state) + reset_resource_state(key, source_state) resolved_paths = resolve_paths(self.state_paths_to_drop, source_state) if self.state_paths_to_drop and not resolved_paths: self.info['warnings'].append(f"State paths {self.state_paths_to_drop} did not select any paths in source {source_name}") diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py index 146ca954bf..cf644aa08d 100644 --- a/tests/extract/test_incremental.py +++ b/tests/extract/test_incremental.py @@ -478,23 +478,47 @@ def child(item): # also transformer will not receive new data info = p.run(child) assert len(info.loads_ids) == 0 - # now it will + # now it will (as the parent resource also got reset) info = p.run(child, write_disposition="replace") - print(info.load_packages[0]) + # print(info.load_packages[0]) assert len(info.loads_ids) == 1 + # pipeline applied hints to the child resource + assert child.write_disposition == "replace" + # create a source where we place only child + s = DltSource("comp", "section", Schema("comp"), [child]) + # but extracted resources will include its parent where it derives write disposition from child + extracted = s.resources.extracted + assert extracted[child.name].write_disposition == "replace" + assert extracted[child._pipe.parent.name].write_disposition == "replace" + + # create a source where we place parent explicitly s = DltSource("comp", "section", Schema("comp"), [parent_r, child]) + extracted = s.resources.extracted + assert extracted[child.name].write_disposition == "replace" + # now parent exists separately and has its own write disposition + assert extracted[child._pipe.parent.name].write_disposition == "append" p = dlt.pipeline(pipeline_name=uniq_id(), destination="duckdb") info = p.run(s) + # print(s.state) assert len(info.loads_ids) == 1 info = p.run(s) - # state was reset + # print(s.state) + # state was reset (child is replace but parent is append! so it will not generate any more items due to incremental + # so child will reset itself on replace and never set the state...) assert 'child' not in s.state['resources'] # there will be a load package to reset the state but also a load package to update the child table assert len(info.load_packages[0].jobs['completed_jobs']) == 2 assert {job.job_file_info.table_name for job in info.load_packages[0].jobs['completed_jobs'] } == {"_dlt_pipeline_state", "child"} + # now we add child that has parent_r as parent but we add another instance of standalone_some_data explicitly + # so we have a resource with the same name as child parent but the pipe instance is different + s = DltSource("comp", "section", Schema("comp"), [standalone_some_data(now), child]) + assert extracted[child.name].write_disposition == "replace" + # now parent exists separately and has its own write disposition - because we search by name to identify matching resource + assert extracted[child._pipe.parent.name].write_disposition == "append" + def test_incremental_as_transform() -> None: From 6f28c980de9d7c6b0e61926bc1f9d87abbed4451 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 9 Oct 2023 22:20:03 +0200 Subject: [PATCH 13/15] adds glob filesystem files + tests --- dlt/common/storages/fsspec_filesystem.py | 65 +- dlt/sources/filesystem.py | 2 +- pyproject.toml | 9 +- .../storages/samples/csv/freshman_kgs.csv | 68 ++ .../storages/samples/csv/freshman_lbs.csv | 69 ++ .../storages/samples/csv/mlb_players.csv | 1035 +++++++++++++++++ .../storages/samples/csv/mlb_teams_2012.csv | 32 + .../storages/samples/jsonl/mlb_players.jsonl | 1034 ++++++++++++++++ .../samples/met_csv/A801/A881_20230920.csv | 25 + .../samples/met_csv/A803/A803_20230919.csv | 25 + .../samples/met_csv/A803/A803_20230920.csv | 25 + .../samples/parquet/mlb_players.parquet | Bin 0 -> 21026 bytes tests/common/storages/samples/sample.txt | 1 + .../common/storages/test_local_filesystem.py | 31 + tests/common/storages/utils.py | 57 + .../load/filesystem/test_filesystem_common.py | 21 +- 16 files changed, 2485 insertions(+), 14 deletions(-) create mode 100644 tests/common/storages/samples/csv/freshman_kgs.csv create mode 100644 tests/common/storages/samples/csv/freshman_lbs.csv create mode 100644 tests/common/storages/samples/csv/mlb_players.csv create mode 100644 tests/common/storages/samples/csv/mlb_teams_2012.csv create mode 100644 tests/common/storages/samples/jsonl/mlb_players.jsonl create mode 100644 tests/common/storages/samples/met_csv/A801/A881_20230920.csv create mode 100644 tests/common/storages/samples/met_csv/A803/A803_20230919.csv create mode 100644 tests/common/storages/samples/met_csv/A803/A803_20230920.csv create mode 100644 tests/common/storages/samples/parquet/mlb_players.parquet create mode 100644 tests/common/storages/samples/sample.txt create mode 100644 tests/common/storages/test_local_filesystem.py create mode 100644 tests/common/storages/utils.py diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index fbb4eccc39..155b111f1e 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -1,6 +1,10 @@ import io -from io import BytesIO, IOBase -from typing import cast, Tuple, TypedDict, Optional, Union, Any +import mimetypes +import posixpath +import pathlib +from urllib.parse import urlparse +from io import BytesIO +from typing import cast, Tuple, TypedDict, Optional, Union, Iterator, Any, IO from fsspec.core import url_to_fs from fsspec import AbstractFileSystem @@ -15,14 +19,14 @@ from dlt import version -class FileItem(TypedDict): +class FileItem(TypedDict, total=False): """A DataItem representing a file""" file_url: str file_name: str mime_type: str modification_date: pendulum.DateTime size_in_bytes: int - file_content: Optional[Union[str, bytes]] + file_content: Optional[bytes] # Map of protocol to mtime resolver @@ -117,7 +121,7 @@ def fsspec(self) -> AbstractFileSystem: else: return fsspec_filesystem(self["file_url"], self.credentials)[0] - def open(self, mode: str = "rb", **kwargs: Any) -> IOBase: # noqa: A003 + def open(self, mode: str = "rb", **kwargs: Any) -> IO[Any]: # noqa: A003 """Open the file as a fsspec file. This method opens the file represented by this dictionary as a file-like object using @@ -129,7 +133,7 @@ def open(self, mode: str = "rb", **kwargs: Any) -> IOBase: # noqa: A003 Returns: IOBase: The fsspec file. """ - opened_file: IOBase + opened_file: IO[Any] # if the user has already extracted the content, we use it so there will be no need to # download the file again. if "file_content" in self: @@ -148,7 +152,7 @@ def open(self, mode: str = "rb", **kwargs: Any) -> IOBase: # noqa: A003 else: return bytes_io else: - opened_file = self.fsspec.open(self["file_url"], **kwargs) + opened_file = self.fsspec.open(self["file_url"], mode=mode, **kwargs) return opened_file def read_bytes(self) -> bytes: @@ -165,3 +169,50 @@ def read_bytes(self) -> bytes: content = self.fsspec.read_bytes(self["file_url"]) return content + +def guess_mime_type(file_name: str) -> str: + mime_type = mimetypes.guess_type(posixpath.basename(file_name), strict=False)[0] + if not mime_type: + mime_type = "application/" + (posixpath.splitext(file_name)[1][1:] or "octet-stream") + return mime_type + + +def glob_files( + fs_client: AbstractFileSystem, bucket_url: str, file_glob: str = "**/*" +) -> Iterator[FileItem]: + """Get the files from the filesystem client. + + Args: + fs_client (AbstractFileSystem): The filesystem client. + bucket_url (str): The url to the bucket. + file_glob (str): A glob for the filename filter. + + Returns: + Iterable[FileItem]: The list of files. + """ + bucket_url_parsed = urlparse(bucket_url) + if not bucket_url_parsed.scheme: + # this is a file so create a proper file url + bucket_url = pathlib.Path(bucket_url).absolute().as_uri() + bucket_url_parsed = urlparse(bucket_url) + + bucket_path = bucket_url_parsed._replace(scheme='').geturl() + bucket_path = bucket_path[2:] if bucket_path.startswith("//") else bucket_path + filter_url = posixpath.join(bucket_path, file_glob) + + glob_result = fs_client.glob(filter_url, detail=True) + if isinstance(glob_result, list): + raise NotImplementedError("Cannot request details when using fsspec.glob. For ADSL (Azure) please use version 2023.9.0 or later") + + for file, md in glob_result.items(): + if md["type"] != "file": + continue + file_name = posixpath.relpath(file, bucket_path) + file_url = bucket_url_parsed.scheme + "://" + file + yield FileItem( + file_name=file_name, + file_url=file_url, + mime_type=guess_mime_type(file_name), + modification_date=MTIME_DISPATCH[bucket_url_parsed.scheme](md), + size_in_bytes=int(md["size"]), + ) diff --git a/dlt/sources/filesystem.py b/dlt/sources/filesystem.py index 47e8e26890..af874788c1 100644 --- a/dlt/sources/filesystem.py +++ b/dlt/sources/filesystem.py @@ -1 +1 @@ -from dlt.common.storages.fsspec_filesystem import FileItem, FileItemDict, fsspec_filesystem \ No newline at end of file +from dlt.common.storages.fsspec_filesystem import FileItem, FileItemDict, fsspec_filesystem, glob_files \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4c00cfe55d..9bf5247913 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "dlt" -version = "0.3.19a0" -description = "dlt is an open-source python-native scalable data loading library that does not require any devops efforts to run." +version = "0.3.19a1" +description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." authors = ["dltHub Inc. "] maintainers = [ "Marcin Rudolf ", "Adrian Brudaru ", "Ty Dunn "] readme = "README.md" @@ -9,12 +9,13 @@ license = "Apache-2.0" homepage = "https://github.com/dlt-hub" repository = "https://github.com/dlt-hub/dlt" classifiers = [ - "Development Status :: 2 - Beta", + "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Topic :: Software Development :: Libraries", "Operating System :: MacOS :: MacOS X", - "Operating System :: POSIX :: Linux",] + "Operating System :: POSIX :: Linux", + "Operating System :: Microsoft :: Windows",] keywords = [ "etl" ] include = [ "LICENSE.txt", "README.md"] packages = [ diff --git a/tests/common/storages/samples/csv/freshman_kgs.csv b/tests/common/storages/samples/csv/freshman_kgs.csv new file mode 100644 index 0000000000..0c44799f43 --- /dev/null +++ b/tests/common/storages/samples/csv/freshman_kgs.csv @@ -0,0 +1,68 @@ +Sex,Weight(Sep),Weight(Apr),BMI(Sep),BMI(Apr) +M,72,59,22.02,18.14 +M,97,86,19.70,17.44 +M,74,69,24.09,22.43 +M,93,88,26.97,25.57 +F,68,64,21.51,20.10 +M,59,55,18.69,17.40 +F,64,60,24.24,22.88 +F,56,53,21.23,20.23 +F,70,68,30.26,29.24 +F,58,56,21.88,21.02 +F,50,47,17.63,16.89 +M,71,69,24.57,23.85 +M,67,66,20.68,20.15 +F,56,55,20.97,20.36 +F,70,68,27.30,26.73 +F,61,60,23.30,22.88 +F,53,52,19.48,19.24 +M,92,92,24.74,24.69 +F,57,58,20.69,20.79 +M,67,67,20.49,20.60 +F,58,58,21.09,21.24 +F,49,50,18.37,18.53 +M,68,68,22.40,22.61 +F,69,69,28.17,28.43 +M,87,88,23.60,23.81 +M,81,82,26.52,26.78 +M,60,61,18.89,19.27 +F,52,53,19.31,19.75 +M,70,71,20.96,21.32 +F,63,64,21.78,22.22 +F,56,57,19.78,20.23 +M,68,69,22.40,22.82 +M,68,69,22.76,23.19 +F,54,56,20.15,20.69 +M,80,82,22.14,22.57 +M,64,66,20.27,20.76 +F,57,59,22.15,22.93 +F,63,65,23.87,24.67 +F,54,56,18.61,19.34 +F,56,58,21.73,22.58 +M,54,56,18.93,19.72 +M,73,75,25.88,26.72 +M,77,79,28.59,29.53 +F,63,66,21.89,22.79 +F,51,54,18.31,19.28 +F,59,62,19.64,20.63 +F,65,68,23.02,24.10 +F,53,56,20.63,21.91 +F,62,65,22.61,23.81 +F,55,58,22.03,23.42 +M,74,77,20.31,21.34 +M,74,78,20.31,21.36 +M,64,68,19.59,20.77 +M,64,68,21.05,22.31 +F,57,61,23.47,25.11 +F,64,68,22.84,24.29 +F,60,64,19.50,20.90 +M,64,68,18.51,19.83 +M,66,71,21.40,22.97 +F,52,57,17.72,19.42 +M,71,77,22.26,23.87 +F,55,60,21.64,23.81 +M,65,71,22.51,24.45 +M,75,82,23.69,25.80 +F,42,49,15.08,17.74 +M,74,82,22.64,25.33 +M,94,105,36.57,40.86 diff --git a/tests/common/storages/samples/csv/freshman_lbs.csv b/tests/common/storages/samples/csv/freshman_lbs.csv new file mode 100644 index 0000000000..31f4bd7383 --- /dev/null +++ b/tests/common/storages/samples/csv/freshman_lbs.csv @@ -0,0 +1,69 @@ +Sex,Weight(lbs,Sep),Weight(lbs,Apr),BMI(Sep),BMI(Apr) +M,159,130,22.02,18.14 +M,214,190,19.70,17.44 +M,163,152,24.09,22.43 +M,205,194,26.97,25.57 +F,150,141,21.51,20.10 +M,130,121,18.69,17.40 +F,141,132,24.24,22.88 +F,123,117,21.23,20.23 +F,154,150,30.26,29.24 +F,128,123,21.88,21.02 +F,110,104,17.63,16.89 +M,156,152,24.57,23.85 +M,148,145,20.68,20.15 +F,123,121,20.97,20.36 +F,154,150,27.30,26.73 +F,134,132,23.30,22.88 +F,117,115,19.48,19.24 +M,203,203,24.74,24.69 +F,126,128,20.69,20.79 +M,148,148,20.49,20.60 +F,128,128,21.09,21.24 +F,108,110,18.37,18.53 +M,150,150,22.40,22.61 +F,152,152,28.17,28.43 +M,192,194,23.60,23.81 +M,179,181,26.52,26.78 +M,132,134,18.89,19.27 +F,115,117,19.31,19.75 +M,154,156,20.96,21.32 +F,139,141,21.78,22.22 +F,123,126,19.78,20.23 +M,150,152,22.40,22.82 +M,150,152,22.76,23.19 +F,119,123,20.15,20.69 +M,176,181,22.14,22.57 +M,141,145,20.27,20.76 +F,126,130,22.15,22.93 +F,139,143,23.87,24.67 +F,119,123,18.61,19.34 +F,123,128,21.73,22.58 +M,119,123,18.93,19.72 +M,161,165,25.88,26.72 +M,170,174,28.59,29.53 +F,139,145,21.89,22.79 +F,112,119,18.31,19.28 +F,130,137,19.64,20.63 +F,143,150,23.02,24.10 +F,117,123,20.63,21.91 +F,137,143,22.61,23.81 +F,121,128,22.03,23.42 +M,163,170,20.31,21.34 +M,163,172,20.31,21.36 +M,141,150,19.59,20.77 +M,141,150,21.05,22.31 +F,126,134,23.47,25.11 +F,141,150,22.84,24.29 +F,132,141,19.50,20.90 +M,141,150,18.51,19.83 +M,145,156,21.40,22.97 +F,115,126,17.72,19.42 +M,156,170,22.26,23.87 +F,121,132,21.64,23.81 +M,143,156,22.51,24.45 +M,165,181,23.69,25.80 +F,93,108,15.08,17.74 +M,163,181,22.64,25.33 +M,207,231,36.57,40.86 + diff --git a/tests/common/storages/samples/csv/mlb_players.csv b/tests/common/storages/samples/csv/mlb_players.csv new file mode 100644 index 0000000000..fbc616a5c4 --- /dev/null +++ b/tests/common/storages/samples/csv/mlb_players.csv @@ -0,0 +1,1035 @@ +Name,Team,Position,Height(inches),Weight(lbs),Age +Adam Donachie,BAL,Catcher,74,180,22.99 +Paul Bako,BAL,Catcher,74,215,34.69 +Ramon Hernandez,BAL,Catcher,72,210,30.78 +Kevin Millar,BAL,First Baseman,72,210,35.43 +Chris Gomez,BAL,First Baseman,73,188,35.71 +Brian Roberts,BAL,Second Baseman,69,176,29.39 +Miguel Tejada,BAL,Shortstop,69,209,30.77 +Melvin Mora,BAL,Third Baseman,71,200,35.07 +Aubrey Huff,BAL,Third Baseman,76,231,30.19 +Adam Stern,BAL,Outfielder,71,180,27.05 +Jeff Fiorentino,BAL,Outfielder,73,188,23.88 +Freddie Bynum,BAL,Outfielder,73,180,26.96 +Nick Markakis,BAL,Outfielder,74,185,23.29 +Brandon Fahey,BAL,Outfielder,74,160,26.11 +Corey Patterson,BAL,Outfielder,69,180,27.55 +Jay Payton,BAL,Outfielder,70,185,34.27 +Jay Gibbons,BAL,Designated Hitter,72,197,30 +Erik Bedard,BAL,Starting Pitcher,73,189,27.99 +Hayden Penn,BAL,Starting Pitcher,75,185,22.38 +Adam Loewen,BAL,Starting Pitcher,78,219,22.89 +Daniel Cabrera,BAL,Starting Pitcher,79,230,25.76 +Steve Trachsel,BAL,Starting Pitcher,76,205,36.33 +Jaret Wright,BAL,Starting Pitcher,74,230,31.17 +Kris Benson,BAL,Starting Pitcher,76,195,32.31 +Scott Williamson,BAL,Relief Pitcher,72,180,31.03 +John Parrish,BAL,Relief Pitcher,71,192,29.26 +Danys Baez,BAL,Relief Pitcher,75,225,29.47 +Chad Bradford,BAL,Relief Pitcher,77,203,32.46 +Jamie Walker,BAL,Relief Pitcher,74,195,35.67 +Brian Burres,BAL,Relief Pitcher,73,182,25.89 +Kurt Birkins,BAL,Relief Pitcher,74,188,26.55 +James Hoey,BAL,Relief Pitcher,78,200,24.17 +Sendy Rleal,BAL,Relief Pitcher,73,180,26.69 +Chris Ray,BAL,Relief Pitcher,75,200,25.13 +Jeremy Guthrie,BAL,Relief Pitcher,73,200,27.9 +A.J. Pierzynski,CWS,Catcher,75,245,30.17 +Toby Hall,CWS,Catcher,75,240,31.36 +Paul Konerko,CWS,First Baseman,74,215,30.99 +Tadahito Iguchi,CWS,Second Baseman,69,185,32.24 +Juan Uribe,CWS,Shortstop,71,175,27.61 +Alex Cintron,CWS,Shortstop,74,199,28.2 +Joe Crede,CWS,Third Baseman,73,200,28.85 +Josh Fields,CWS,Third Baseman,73,215,24.21 +Ryan Sweeney,CWS,Outfielder,76,200,22.02 +Brian N. Anderson,CWS,Outfielder,74,205,24.97 +Luis Terrero,CWS,Outfielder,74,206,26.78 +Pablo Ozuna,CWS,Outfielder,70,186,32.51 +Scott Podsednik,CWS,Outfielder,72,188,30.95 +Jermaine Dye,CWS,Outfielder,77,220,33.09 +Darin Erstad,CWS,Outfielder,74,210,32.74 +Rob Mackowiak,CWS,Outfielder,70,195,30.69 +Jim Thome,CWS,Designated Hitter,76,244,36.51 +Jerry Owens,CWS,Designated Hitter,75,195,26.03 +Charlie Haeger,CWS,Starting Pitcher,73,200,23.45 +Heath Phillips,CWS,Starting Pitcher,75,200,24.94 +Gavin Floyd,CWS,Starting Pitcher,76,212,24.09 +Jose Contreras,CWS,Starting Pitcher,76,224,35.23 +Jon Garland,CWS,Starting Pitcher,78,210,27.43 +Javier Vazquez,CWS,Starting Pitcher,74,205,30.6 +Mark Buehrle,CWS,Starting Pitcher,74,220,27.94 +Mike MacDougal,CWS,Relief Pitcher,76,195,29.99 +David Aardsma,CWS,Relief Pitcher,77,200,25.17 +Andrew Sisco,CWS,Relief Pitcher,81,260,24.13 +Matt Thornton,CWS,Relief Pitcher,78,228,30.46 +Bobby Jenks,CWS,Relief Pitcher,75,270,25.96 +Boone Logan,CWS,Relief Pitcher,77,200,22.55 +Sean Tracey,CWS,Relief Pitcher,75,210,26.29 +Nick Masset,CWS,Relief Pitcher,76,190,24.79 +Jose Molina,ANA,Catcher,74,220,31.74 +Jeff Mathis,ANA,Catcher,72,180,23.92 +Mike Napoli,ANA,Catcher,72,205,25.33 +Casey Kotchman,ANA,First Baseman,75,210,24.02 +Kendry Morales,ANA,First Baseman,73,220,23.7 +Shea Hillenbrand,ANA,First Baseman,73,211,31.59 +Robb Quinlan,ANA,First Baseman,73,200,29.95 +Howie Kendrick,ANA,First Baseman,70,180,23.64 +Orlando Cabrera,ANA,Shortstop,70,190,32.33 +Erick Aybar,ANA,Shortstop,70,170,23.13 +Dallas McPherson,ANA,Third Baseman,76,230,26.6 +Maicer Izturis,ANA,Third Baseman,68,155,26.46 +Reggie Willits,ANA,Outfielder,71,185,25.75 +Tommy Murphy,ANA,Outfielder,72,185,27.51 +Terry Evans,ANA,Outfielder,75,200,25.11 +Gary Matthews Jr.,ANA,Outfielder,75,225,32.51 +Garret Anderson,ANA,Outfielder,75,225,34.67 +Vladimir Guerrero,ANA,Outfielder,75,220,31.06 +Chone Figgins,ANA,Outfielder,68,160,29.1 +Juan Rivera,ANA,Outfielder,74,205,28.66 +John Lackey,ANA,Starting Pitcher,78,235,28.35 +Bartolo Colon,ANA,Starting Pitcher,71,250,33.77 +Kelvim Escobar,ANA,Starting Pitcher,73,210,30.89 +Dustin Moseley,ANA,Starting Pitcher,76,190,37.74 +Ervin Santana,ANA,Starting Pitcher,74,160,24.14 +Joe Saunders,ANA,Starting Pitcher,74,200,25.71 +Jered Weaver,ANA,Starting Pitcher,79,205,24.41 +Chris Resop,ANA,Relief Pitcher,75,222,24.32 +Phil Seibel,ANA,Relief Pitcher,73,195,28.09 +Justin Speier,ANA,Relief Pitcher,76,205,33.31 +Darren Oliver,ANA,Relief Pitcher,74,220,36.4 +Hector Carrasco,ANA,Relief Pitcher,74,220,37.36 +Scot Shields,ANA,Relief Pitcher,73,170,31.61 +Francisco Rodriguez,ANA,Relief Pitcher,72,185,25.14 +Greg Jones,ANA,Relief Pitcher,74,195,30.29 +Doug Mirabelli,BOS,Catcher,73,220,36.37 +Jason Varitek,BOS,Catcher,74,230,34.89 +George Kottaras,BOS,Catcher,72,180,23.79 +Kevin Youkilis,BOS,First Baseman,73,220,27.96 +Dustin Pedroia,BOS,Second Baseman,69,180,23.54 +Alex Cora,BOS,Shortstop,72,180,31.37 +Julio Lugo,BOS,Shortstop,73,170,31.29 +Mike Lowell,BOS,Third Baseman,75,210,33.01 +Wily Mo Pe?a,BOS,Outfielder,75,215,25.1 +J.D. Drew,BOS,Outfielder,73,200,31.28 +Manny Ramirez,BOS,Outfielder,72,213,34.75 +Brandon Moss,BOS,Outfielder,72,180,23.46 +David Murphy,BOS,Outfielder,76,192,25.37 +Eric Hinske,BOS,Outfielder,74,235,29.57 +Coco Crisp,BOS,Outfielder,72,185,27.33 +David Ortiz,BOS,Designated Hitter,76,230,31.28 +Curt Schilling,BOS,Starting Pitcher,77,235,40.29 +Tim Wakefield,BOS,Starting Pitcher,74,210,40.58 +Josh Beckett,BOS,Starting Pitcher,77,222,26.79 +Matt Clement,BOS,Starting Pitcher,75,210,32.55 +Jonathan Papelbon,BOS,Starting Pitcher,76,230,26.27 +Kyle Snyder,BOS,Starting Pitcher,80,220,29.47 +Devern Hansack,BOS,Starting Pitcher,74,180,29.07 +Jon Lester,BOS,Starting Pitcher,74,190,23.15 +Kason Gabbard,BOS,Starting Pitcher,75,200,24.9 +Craig Hansen,BOS,Relief Pitcher,78,210,23.29 +Hideki Okajima,BOS,Relief Pitcher,73,194,31.18 +Craig Breslow,BOS,Relief Pitcher,73,180,26.56 +Manny Delcarmen,BOS,Relief Pitcher,74,190,25.03 +Brendan Donnelly,BOS,Relief Pitcher,75,240,35.66 +Javier Lopez,BOS,Relief Pitcher,76,200,29.64 +J.C. Romero,BOS,Relief Pitcher,71,198,30.74 +Joel Pineiro,BOS,Relief Pitcher,73,200,28.43 +Julian Tavarez,BOS,Relief Pitcher,74,195,33.77 +Mike Timlin,BOS,Relief Pitcher,76,210,40.97 +Nick DeBarr,BOS,Relief Pitcher,76,220,23.52 +Victor Martinez,CLE,Catcher,74,190,28.19 +Kelly Shoppach,CLE,Catcher,73,210,26.84 +Ryan Garko,CLE,First Baseman,74,225,26.16 +Joe Inglett,CLE,Second Baseman,70,180,28.67 +Josh Barfield,CLE,Second Baseman,72,185,24.2 +Hector Luna,CLE,Second Baseman,73,170,27.08 +Jhonny Peralta,CLE,Shortstop,73,185,24.76 +Andy Marte,CLE,Third Baseman,73,185,23.36 +Ben Francisco,CLE,Outfielder,73,180,25.35 +Shin-Soo Choo,CLE,Outfielder,71,178,24.63 +Franklin Gutierrez,CLE,Outfielder,74,175,24.02 +Grady Sizemore,CLE,Outfielder,74,200,24.58 +Jason Michaels,CLE,Outfielder,72,204,30.82 +Trot Nixon,CLE,Outfielder,74,211,32.89 +David Dellucci,CLE,Outfielder,71,190,33.33 +Casey Blake,CLE,Outfielder,74,210,33.52 +Travis Hafner,CLE,Designated Hitter,75,240,29.74 +Paul Byrd,CLE,Starting Pitcher,73,190,36.24 +Cliff Lee,CLE,Starting Pitcher,75,190,28.5 +Jake Westbrook,CLE,Starting Pitcher,75,185,29.42 +C.C. Sabathia,CLE,Starting Pitcher,79,290,26.61 +Jeremy Sowers,CLE,Starting Pitcher,73,175,23.79 +Rafael Perez,CLE,Relief Pitcher,75,185,24.96 +Brian Slocum,CLE,Relief Pitcher,76,200,25.93 +Edward Mujica,CLE,Relief Pitcher,74,220,22.81 +Fernando Cabrera,CLE,Relief Pitcher,76,170,25.29 +Tom Mastny,CLE,Relief Pitcher,78,220,26.07 +Juan Lara,CLE,Relief Pitcher,74,190,26.09 +Fausto Carmona,CLE,Relief Pitcher,76,220,23.23 +Aaron Fultz,CLE,Relief Pitcher,72,205,33.49 +Rafael Betancourt,CLE,Relief Pitcher,74,200,31.84 +Roberto Hernandez,CLE,Relief Pitcher,76,250,42.3 +Joe Borowski,CLE,Relief Pitcher,74,225,35.82 +Matt Miller,CLE,Relief Pitcher,75,215,35.27 +Jason Davis,CLE,Relief Pitcher,78,210,26.81 +Mike Piazza,OAK,Catcher,75,215,38.49 +Jason Kendall,OAK,Catcher,72,195,32.68 +Adam Melhuse,OAK,Catcher,74,200,34.93 +Nick Swisher,OAK,First Baseman,72,194,26.26 +Dan Johnson,OAK,First Baseman,74,220,27.56 +Donald Murphy,OAK,Second Baseman,70,180,23.98 +Mark Ellis,OAK,Second Baseman,71,180,29.73 +Marco Scutaro,OAK,Shortstop,70,170,31.33 +Bobby Crosby,OAK,Shortstop,75,195,27.13 +Mark Kiger,OAK,Shortstop,71,180,26.75 +Antonio Perez,OAK,Third Baseman,71,170,27.09 +Eric Chavez,OAK,Third Baseman,73,206,29.23 +Milton Bradley,OAK,Outfielder,72,205,28.88 +Shannon Stewart,OAK,Outfielder,71,200,33.01 +Bobby Kielty,OAK,Outfielder,73,225,30.57 +Mark Kotsay,OAK,Outfielder,72,201,31.24 +Ryan Goleski,OAK,Outfielder,75,225,24.95 +Jeremy Brown,OAK,Designated Hitter,70,226,27.35 +Jason Windsor,OAK,Starting Pitcher,74,233,24.62 +David Shafer,OAK,Starting Pitcher,74,180,24.98 +Joe Blanton,OAK,Starting Pitcher,75,225,26.22 +Brad Halsey,OAK,Starting Pitcher,73,180,26.04 +Dan Haren,OAK,Starting Pitcher,77,220,26.45 +Rich Harden,OAK,Starting Pitcher,73,180,25.25 +Joe Kennedy,OAK,Starting Pitcher,76,237,27.77 +Esteban Loaiza,OAK,Starting Pitcher,75,215,35.16 +Alan Embree,OAK,Relief Pitcher,74,190,37.1 +Jay Witasick,OAK,Relief Pitcher,76,235,34.51 +Justin Duchscherer,OAK,Relief Pitcher,75,190,29.28 +Kiko Calero,OAK,Relief Pitcher,73,180,32.14 +Chad Gaudin,OAK,Relief Pitcher,71,165,23.94 +Lenny DiNardo,OAK,Relief Pitcher,76,195,27.45 +Scott Dunn,OAK,Relief Pitcher,75,200,28.77 +Huston Street,OAK,Relief Pitcher,72,190,23.58 +Ron Flores,OAK,Relief Pitcher,71,190,27.56 +Jay Marshall,OAK,Relief Pitcher,77,185,24.01 +Marcus McBeth,OAK,Relief Pitcher,73,185,26.52 +Jorge Posada,NYY,Catcher,74,205,35.54 +Wil Nieves,NYY,Catcher,71,190,29.43 +Andy Phillips,NYY,First Baseman,72,205,29.9 +Doug Mientkiewicz,NYY,First Baseman,74,206,32.7 +Josh Phelps,NYY,First Baseman,75,220,28.8 +Miguel Cairo,NYY,Second Baseman,73,208,32.82 +Robinson Cano,NYY,Second Baseman,72,170,24.36 +Derek Jeter,NYY,Shortstop,75,195,32.68 +Alex Rodriguez,NYY,Third Baseman,75,210,31.59 +Johnny Damon,NYY,Outfielder,74,190,33.32 +Bobby Abreu,NYY,Outfielder,72,211,32.97 +Hideki Matsui,NYY,Outfielder,74,230,32.72 +Melky Cabrera,NYY,Outfielder,71,170,22.55 +Kevin Thompson,NYY,Outfielder,70,185,27.45 +Jason Giambi,NYY,Designated Hitter,75,230,36.14 +Mike Mussina,NYY,Starting Pitcher,74,185,38.23 +Carl Pavano,NYY,Starting Pitcher,77,241,31.14 +Andy Pettitte,NYY,Starting Pitcher,77,225,34.71 +Darrell Rasner,NYY,Starting Pitcher,75,210,26.13 +Jeff Karstens,NYY,Starting Pitcher,75,175,24.43 +Humberto Sanchez,NYY,Starting Pitcher,78,230,23.76 +Chien-Ming Wang,NYY,Starting Pitcher,75,200,26.92 +Sean Henn,NYY,Relief Pitcher,76,215,25.85 +Scott Proctor,NYY,Relief Pitcher,73,198,30.16 +Brian Bruney,NYY,Relief Pitcher,75,226,25.03 +Chris Britton,NYY,Relief Pitcher,75,278,24.21 +T.J. Beam,NYY,Relief Pitcher,79,215,26.51 +Jose Veras,NYY,Relief Pitcher,77,230,26.36 +Kyle Farnsworth,NYY,Relief Pitcher,76,240,30.88 +Luis Vizcaino,NYY,Relief Pitcher,71,184,32.57 +Mike Myers,NYY,Relief Pitcher,75,219,37.68 +Mariano Rivera,NYY,Relief Pitcher,74,170,37.25 +Ivan Rodriguez,DET,Catcher,69,218,35.25 +Vance Wilson,DET,Catcher,71,190,33.95 +Sean Casey,DET,First Baseman,76,225,32.66 +Chris Shelton,DET,First Baseman,72,220,26.68 +Omar Infante,DET,Second Baseman,72,176,25.18 +Placido Polanco,DET,Second Baseman,70,190,31.39 +Neifi Perez,DET,Second Baseman,72,197,33.74 +Carlos Guillen,DET,Shortstop,73,204,31.42 +Ramon Santiago,DET,Shortstop,71,167,27.5 +Tony Giarratano,DET,Shortstop,72,180,24.25 +Brandon Inge,DET,Third Baseman,71,195,29.78 +Craig Monroe,DET,Outfielder,73,220,30 +Magglio Ordo?ez,DET,Outfielder,72,215,33.09 +Curtis Granderson,DET,Outfielder,73,185,25.96 +Brent Clevlen,DET,Outfielder,74,190,23.34 +Marcus Thames,DET,Outfielder,74,205,29.98 +Gary Sheffield,DET,Outfielder,72,205,38.28 +Mike Rabelo,DET,Designated Hitter,73,200,27.12 +Zach Miner,DET,Starting Pitcher,75,200,24.97 +Jeremy Bonderman,DET,Starting Pitcher,74,210,24.34 +Nate Robertson,DET,Starting Pitcher,74,215,29.49 +Justin Verlander,DET,Starting Pitcher,77,200,24.02 +Virgil Vasquez,DET,Starting Pitcher,75,205,24.73 +Kenny Rogers,DET,Starting Pitcher,73,211,42.3 +Mike Maroth,DET,Starting Pitcher,72,190,29.54 +Fernando Rodney,DET,Relief Pitcher,71,208,29.95 +Chad Durbin,DET,Relief Pitcher,74,200,29.24 +Jason Grilli,DET,Relief Pitcher,77,210,30.3 +Jose Mesa,DET,Relief Pitcher,75,232,40.77 +Todd Jones,DET,Relief Pitcher,75,230,38.85 +Joel Zumaya,DET,Relief Pitcher,75,210,22.31 +Jordan Tata,DET,Relief Pitcher,78,220,25.44 +Andrew Miller,DET,Relief Pitcher,78,210,21.78 +Yorman Bazardo,DET,Relief Pitcher,74,202,22.64 +Wilfredo Ledezma,DET,Relief Pitcher,76,212,26.11 +Roman Colon,DET,Relief Pitcher,78,225,27.55 +Edward Campusano,DET,Relief Pitcher,76,170,24.63 +Rene Rivera,SEA,Catcher,70,190,23.58 +Kenji Johjima,SEA,Catcher,72,200,30.73 +Richie Sexson,SEA,First Baseman,80,237,32.17 +Ben Broussard,SEA,First Baseman,74,220,30.43 +Jose Lopez,SEA,Second Baseman,74,170,23.27 +Jose Vidro,SEA,Second Baseman,71,193,32.51 +Yuniesky Betancourt,SEA,Shortstop,70,190,25.08 +Oswaldo Navarro,SEA,Shortstop,72,150,22.41 +Adrian Beltre,SEA,Third Baseman,71,220,27.9 +Raul Ibanez,SEA,Outfielder,74,200,34.74 +Jose Guillen,SEA,Outfielder,71,190,30.79 +Jeremy Reed,SEA,Outfielder,72,185,25.71 +Willie Bloomquist,SEA,Outfielder,71,185,29.26 +Adam Jones,SEA,Outfielder,74,200,21.58 +Ichiro Suzuki,SEA,Outfielder,69,172,33.36 +Mike Morse,SEA,Outfielder,76,220,24.94 +Felix Hernandez,SEA,Starting Pitcher,75,225,20.9 +Ryan Feierabend,SEA,Starting Pitcher,75,190,21.52 +Sean White,SEA,Starting Pitcher,76,195,25.85 +Horacio Ramirez,SEA,Starting Pitcher,73,219,27.27 +Cha Baek,SEA,Starting Pitcher,76,190,26.75 +Miguel Batista,SEA,Starting Pitcher,73,197,36.03 +Jeff Weaver,SEA,Starting Pitcher,77,200,30.52 +Jarrod Washburn,SEA,Starting Pitcher,73,195,32.55 +George Sherrill,SEA,Relief Pitcher,72,210,29.86 +Julio Mateo,SEA,Relief Pitcher,72,177,29.58 +J.J. Putz,SEA,Relief Pitcher,77,220,30.02 +Chris Reitsma,SEA,Relief Pitcher,77,235,29.16 +Cesar Jimenez,SEA,Relief Pitcher,71,180,22.3 +Eric O'Flaherty,SEA,Relief Pitcher,74,195,22.06 +Jon Huber,SEA,Relief Pitcher,74,195,25.65 +Jake Woods,SEA,Relief Pitcher,73,190,25.49 +Sean Green,SEA,Relief Pitcher,78,230,27.86 +Mark Lowe,SEA,Relief Pitcher,75,190,23.73 +Josh Paul,TB,Catcher,73,200,31.78 +Dioner Navarro,TB,Catcher,70,190,23.06 +Shawn Riggans,TB,Catcher,74,190,26.6 +Ty Wigginton,TB,First Baseman,72,200,29.39 +Brendan Harris,TB,Second Baseman,73,200,26.51 +Jorge Cantu,TB,Second Baseman,73,184,25.08 +Ben Zobrist,TB,Shortstop,75,200,25.76 +B.J. Upton,TB,Third Baseman,75,180,22.52 +Carl Crawford,TB,Outfielder,74,219,25.57 +Rocco Baldelli,TB,Outfielder,76,187,25.43 +Greg Norton,TB,Outfielder,73,200,34.65 +Elijah Dukes,TB,Outfielder,74,220,22.68 +Delmon Young,TB,Outfielder,75,205,21.46 +Jonny Gomes,TB,Designated Hitter,73,205,26.27 +Edwin Jackson,TB,Starting Pitcher,75,190,23.47 +Scott Kazmir,TB,Starting Pitcher,72,170,23.1 +Casey Fossum,TB,Starting Pitcher,73,160,29.14 +Jae Seo,TB,Starting Pitcher,73,215,29.77 +J.P. Howell,TB,Starting Pitcher,72,175,23.85 +Tim Corcoran,TB,Starting Pitcher,74,205,28.88 +Jason Hammel,TB,Starting Pitcher,78,200,24.49 +James Shields,TB,Starting Pitcher,76,214,25.19 +Brian Stokes,TB,Starting Pitcher,73,200,27.48 +Juan Salas,TB,Relief Pitcher,74,190,28.31 +Jeff Ridgway,TB,Relief Pitcher,75,180,26.54 +Ruddy Lugo,TB,Relief Pitcher,70,205,26.77 +Jae-Kuk Ryu,TB,Relief Pitcher,75,220,23.75 +Chad Orvella,TB,Relief Pitcher,71,190,26.41 +Dan Miceli,TB,Relief Pitcher,72,215,36.47 +Seth McClung,TB,Relief Pitcher,78,235,26.06 +Jon Switzer,TB,Relief Pitcher,75,191,27.55 +Shawn Camp,TB,Relief Pitcher,73,200,31.28 +Scott Dohmann,TB,Relief Pitcher,73,181,29.04 +Jason LaRue,KC,Catcher,71,200,32.95 +John Buck,KC,Catcher,75,210,26.65 +Ryan Shealy,KC,First Baseman,77,240,27.5 +Ross Gload,KC,First Baseman,72,185,30.9 +Esteban German,KC,Second Baseman,69,165,29.09 +Mark Grudzielanek,KC,Second Baseman,73,190,36.67 +Angel Sanchez,KC,Second Baseman,74,185,23.44 +Angel Berroa,KC,Shortstop,72,175,29.09 +Andres Blanco,KC,Shortstop,70,155,22.89 +Mark Teahen,KC,Third Baseman,75,210,25.48 +Joey Gathright,KC,Outfielder,70,170,25.84 +David DeJesus,KC,Outfielder,72,175,27.2 +Shane Costa,KC,Outfielder,72,220,25.22 +Mitch Maier,KC,Outfielder,74,210,24.67 +Reggie Sanders,KC,Outfielder,73,205,39.25 +Emil Brown,KC,Outfielder,74,200,32.17 +Mike Sweeney,KC,Designated Hitter,75,225,33.61 +John Bale,KC,Starting Pitcher,76,205,32.77 +Luke Hudson,KC,Starting Pitcher,75,195,29.83 +Scott Elarton,KC,Starting Pitcher,80,240,31.02 +Odalis Perez,KC,Starting Pitcher,72,150,29.73 +Gil Meche,KC,Starting Pitcher,75,200,28.48 +Neal Musser,KC,Starting Pitcher,73,215,26.51 +Brian Bannister,KC,Starting Pitcher,74,202,26 +Zack Greinke,KC,Starting Pitcher,74,200,23.36 +Jorge De La Rosa,KC,Starting Pitcher,73,190,25.9 +Todd Wellemeyer,KC,Relief Pitcher,75,205,28.5 +Jimmy Gobble,KC,Relief Pitcher,75,190,25.62 +Joel Peralta,KC,Relief Pitcher,71,160,30.94 +Ryan Braun,KC,Relief Pitcher,73,215,26.59 +Joakim Soria,KC,Relief Pitcher,75,185,22.78 +Ken Ray,KC,Relief Pitcher,74,200,32.26 +David Riske,KC,Relief Pitcher,74,190,30.35 +Octavio Dotel,KC,Relief Pitcher,72,210,33.26 +Joe Nelson,KC,Relief Pitcher,74,185,32.35 +Gerald Laird,TEX,Catcher,74,220,27.3 +Miguel Ojeda,TEX,Catcher,74,190,32.08 +Guillermo Quiroz,TEX,Catcher,73,202,25.25 +Chris Stewart,TEX,Catcher,76,205,25.03 +Mark Teixeira,TEX,First Baseman,75,220,26.89 +Ian Kinsler,TEX,Second Baseman,72,175,24.69 +Joaquin Arias,TEX,Shortstop,73,160,22.44 +Michael Young,TEX,Shortstop,73,190,30.36 +Hank Blalock,TEX,Third Baseman,73,200,26.27 +Marlon Byrd,TEX,Outfielder,72,229,29.5 +Brad Wilkerson,TEX,Outfielder,72,206,29.75 +Sammy Sosa,TEX,Outfielder,72,220,38.3 +Kenny Lofton,TEX,Outfielder,72,180,39.75 +Frank Catalanotto,TEX,Outfielder,71,195,32.84 +Nelson Cruz,TEX,Outfielder,75,175,26.66 +Jason Botts,TEX,Designated Hitter,77,250,26.6 +Robinson Tejeda,TEX,Starting Pitcher,75,188,24.94 +John Rheinecker,TEX,Starting Pitcher,74,230,27.76 +Edinson Volquez,TEX,Starting Pitcher,73,190,23.66 +A.J. Murray,TEX,Starting Pitcher,75,200,24.96 +Brandon McCarthy,TEX,Starting Pitcher,79,190,23.65 +Vicente Padilla,TEX,Starting Pitcher,74,219,29.42 +Kevin Millwood,TEX,Starting Pitcher,76,235,32.18 +John Koronka,TEX,Starting Pitcher,73,180,26.66 +Frank Francisco,TEX,Relief Pitcher,74,180,27.47 +Francisco Cruceta,TEX,Relief Pitcher,74,180,25.66 +Akinori Otsuka,TEX,Relief Pitcher,72,200,35.13 +Eric Gagne,TEX,Relief Pitcher,74,234,31.15 +Ron Mahay,TEX,Relief Pitcher,74,185,35.67 +Joaquin Benoit,TEX,Relief Pitcher,75,220,29.6 +Rick Bauer,TEX,Relief Pitcher,78,223,30.14 +Josh Rupe,TEX,Relief Pitcher,74,200,24.53 +Wes Littleton,TEX,Relief Pitcher,74,210,24.49 +C.J. Wilson,TEX,Relief Pitcher,74,200,26.28 +Scott Feldman,TEX,Relief Pitcher,77,210,24.06 +Gregg Zaun,TOR,Catcher,70,190,35.88 +Jason Phillips,TOR,Catcher,73,177,30.42 +Lyle Overbay,TOR,First Baseman,74,227,30.09 +Russ Adams,TOR,Second Baseman,73,180,26.5 +Aaron Hill,TOR,Second Baseman,71,195,24.94 +Jason Smith,TOR,Second Baseman,75,199,29.6 +John McDonald,TOR,Shortstop,71,175,32.43 +Royce Clayton,TOR,Shortstop,72,185,37.16 +Troy Glaus,TOR,Third Baseman,77,240,30.57 +John Hattig,TOR,Third Baseman,74,210,27.01 +Reed Johnson,TOR,Outfielder,70,180,30.23 +Alex Rios,TOR,Outfielder,77,194,26.03 +Vernon Wells,TOR,Outfielder,73,225,28.23 +Frank Thomas,TOR,Designated Hitter,77,275,38.76 +Adam Lind,TOR,Designated Hitter,74,195,23.62 +Shaun Marcum,TOR,Starting Pitcher,72,180,25.21 +Casey Janssen,TOR,Starting Pitcher,76,205,25.45 +Gustavo Chacin,TOR,Starting Pitcher,71,193,26.24 +A.J. Burnett,TOR,Starting Pitcher,76,230,30.15 +Roy Halladay,TOR,Starting Pitcher,78,230,29.8 +John Thomson,TOR,Starting Pitcher,75,220,33.41 +Tomo Ohka,TOR,Starting Pitcher,73,200,30.95 +B.J. Ryan,TOR,Relief Pitcher,78,249,31.17 +Scott Downs,TOR,Relief Pitcher,74,190,30.95 +Brian Tallet,TOR,Relief Pitcher,79,208,29.44 +Matt Roney,TOR,Relief Pitcher,75,245,27.14 +Tracy Thorpe,TOR,Relief Pitcher,76,250,26.21 +Jean Machi,TOR,Relief Pitcher,72,160,24.08 +Brandon League,TOR,Relief Pitcher,75,192,23.96 +Dustin McGowan,TOR,Relief Pitcher,75,220,24.94 +Jason Frasor,TOR,Relief Pitcher,70,170,29.56 +Francisco Rosario,TOR,Relief Pitcher,72,197,26.42 +Davis Romero,TOR,Relief Pitcher,70,155,23.92 +Jeremy Accardo,TOR,Relief Pitcher,74,190,25.23 +Mike Redmond,MIN,Catcher,71,200,35.82 +Joe Mauer,MIN,Catcher,76,220,23.87 +Chris Heintz,MIN,Catcher,73,210,32.57 +Justin Morneau,MIN,First Baseman,76,228,25.79 +Luis Castillo,MIN,Second Baseman,71,190,31.47 +Alexi Casilla,MIN,Second Baseman,69,160,22.61 +Alejandro Machado,MIN,Second Baseman,72,184,24.85 +Jason Bartlett,MIN,Shortstop,72,180,27.33 +Luis Rodriguez,MIN,Third Baseman,69,180,26.67 +Jeff Cirillo,MIN,Third Baseman,73,200,37.43 +Nick Punto,MIN,Third Baseman,69,176,29.31 +Jason Tyner,MIN,Outfielder,73,160,29.85 +Michael Cuddyer,MIN,Outfielder,74,222,27.93 +Torii Hunter,MIN,Outfielder,74,211,31.62 +Lew Ford,MIN,Outfielder,72,195,30.55 +Jason Kubel,MIN,Outfielder,71,200,24.77 +Josh Rabe,MIN,Designated Hitter,74,210,28.38 +Rondell White,MIN,Designated Hitter,73,225,35.02 +Ramon Ortiz,MIN,Starting Pitcher,72,175,33.77 +Johan Santana,MIN,Starting Pitcher,72,206,27.97 +Carlos Silva,MIN,Starting Pitcher,76,240,27.85 +Matt Garza,MIN,Starting Pitcher,76,185,23.26 +Boof Bonser,MIN,Starting Pitcher,76,260,25.38 +Francisco Liriano,MIN,Starting Pitcher,74,185,23.35 +Scott Baker,MIN,Starting Pitcher,76,221,25.45 +Pat Neshek,MIN,Relief Pitcher,75,205,26.49 +Glen Perkins,MIN,Relief Pitcher,71,200,24 +Julio DePaula,MIN,Relief Pitcher,72,170,24.16 +Juan Rincon,MIN,Relief Pitcher,71,201,28.1 +Jesse Crain,MIN,Relief Pitcher,73,205,25.65 +Matt Guerrier,MIN,Relief Pitcher,75,185,28.58 +Joe Nathan,MIN,Relief Pitcher,76,205,32.27 +Dennys Reyes,MIN,Relief Pitcher,75,245,29.86 +Brayan Pe?a,ATL,Catcher,71,220,25.14 +Brian McCann,ATL,Catcher,75,210,23.03 +Craig Wilson,ATL,First Baseman,74,220,30.25 +Chris Woodward,ATL,Second Baseman,72,185,30.67 +Pete Orr,ATL,Second Baseman,73,175,27.73 +Martin Prado,ATL,Second Baseman,73,170,23.34 +Tony Pe?a,ATL,Shortstop,73,180,25.94 +Edgar Renteria,ATL,Shortstop,73,200,31.56 +Chipper Jones,ATL,Third Baseman,76,210,34.85 +Willy Aybar,ATL,Third Baseman,72,175,23.98 +Jeff Francoeur,ATL,Outfielder,76,220,23.14 +Matt Diaz,ATL,Outfielder,73,206,28.99 +Kelly Johnson,ATL,Outfielder,73,180,25.02 +Andruw Jones,ATL,Outfielder,73,210,29.85 +Ryan Langerhans,ATL,Outfielder,75,195,27.03 +Scott Thorman,ATL,Outfielder,75,200,25.15 +T.J. Bohn,ATL,Outfielder,77,200,27.12 +Tim Hudson,ATL,Starting Pitcher,73,164,31.63 +Jonathan Johnson,ATL,Starting Pitcher,72,180,32.62 +John Smoltz,ATL,Starting Pitcher,75,220,39.79 +Mike Hampton,ATL,Starting Pitcher,70,195,34.47 +Kyle Davies,ATL,Starting Pitcher,74,205,23.47 +Chuck James,ATL,Starting Pitcher,72,170,25.31 +Phil Stockman,ATL,Relief Pitcher,80,240,27.1 +Macay McBride,ATL,Relief Pitcher,71,210,24.35 +Joey Devine,ATL,Relief Pitcher,71,195,23.45 +Peter Moylan,ATL,Relief Pitcher,74,200,28.24 +Mike Gonzalez,ATL,Relief Pitcher,74,205,28.77 +Lance Cormier,ATL,Relief Pitcher,73,192,26.53 +Blaine Boyer,ATL,Relief Pitcher,75,190,25.64 +Manny Acosta,ATL,Relief Pitcher,76,170,25.83 +Bob Wickman,ATL,Relief Pitcher,73,240,38.06 +Tanyon Sturtze,ATL,Relief Pitcher,77,200,36.38 +Oscar Villarreal,ATL,Relief Pitcher,72,205,25.27 +Rafael Soriano,ATL,Relief Pitcher,73,175,27.2 +Chad Paronto,ATL,Relief Pitcher,77,250,31.59 +Tyler Yates,ATL,Relief Pitcher,76,220,29.56 +Henry Blanco,CHC,Catcher,71,224,35.5 +Michael Barrett,CHC,Catcher,75,210,30.35 +Geovany Soto,CHC,Catcher,73,195,24.11 +Scott Moore,CHC,First Baseman,74,180,23.29 +Derrek Lee,CHC,First Baseman,77,245,31.48 +Ryan Theriot,CHC,Second Baseman,71,175,27.23 +Ronny Cedeno,CHC,Shortstop,72,180,24.07 +Aramis Ramirez,CHC,Third Baseman,73,215,28.68 +Cesar Izturis,CHC,Third Baseman,69,175,27.05 +Alfonso Soriano,CHC,Outfielder,73,180,31.15 +Jacque Jones,CHC,Outfielder,70,195,31.85 +Daryle Ward,CHC,Outfielder,74,230,31.68 +Cliff Floyd,CHC,Outfielder,76,230,34.23 +Mark DeRosa,CHC,Outfielder,73,205,32.01 +Matt Murton,CHC,Outfielder,73,215,25.41 +Buck Coats,CHC,Outfielder,75,195,24.73 +Angel Pagan,CHC,Outfielder,73,180,25.66 +Sean Marshall,CHC,Starting Pitcher,79,205,24.5 +Carlos Marmol,CHC,Starting Pitcher,74,180,24.38 +Ryan O'Malley,CHC,Starting Pitcher,73,190,26.89 +Juan Mateo,CHC,Starting Pitcher,74,180,24.2 +Rich Hill,CHC,Starting Pitcher,77,190,26.97 +Angel Guzman,CHC,Starting Pitcher,75,190,25.21 +Wade Miller,CHC,Starting Pitcher,74,220,30.46 +Jason Marquis,CHC,Starting Pitcher,73,210,28.53 +Carlos Zambrano,CHC,Starting Pitcher,77,255,25.75 +Ted Lilly,CHC,Starting Pitcher,73,190,31.15 +Mark Prior,CHC,Starting Pitcher,77,230,26.48 +Neal Cotts,CHC,Relief Pitcher,74,200,26.93 +Will Ohman,CHC,Relief Pitcher,74,205,29.55 +Scott Eyre,CHC,Relief Pitcher,73,210,34.75 +Kerry Wood,CHC,Relief Pitcher,77,225,29.71 +Ryan Dempster,CHC,Relief Pitcher,74,215,29.83 +Bob Howry,CHC,Relief Pitcher,77,220,33.57 +Mike Wuertz,CHC,Relief Pitcher,75,205,28.21 +Roberto Novoa,CHC,Relief Pitcher,77,200,27.54 +Chris Snyder,ARZ,Catcher,75,220,26.05 +Miguel Montero,ARZ,Catcher,71,197,23.64 +Conor Jackson,ARZ,First Baseman,74,225,24.82 +Robby Hammock,ARZ,First Baseman,70,187,29.8 +Tony Clark,ARZ,First Baseman,79,245,34.71 +Orlando Hudson,ARZ,Second Baseman,72,185,29.22 +Stephen Drew,ARZ,Shortstop,72,185,23.96 +Alberto Callaspo,ARZ,Shortstop,70,175,23.87 +Chad Tracy,ARZ,Third Baseman,74,200,26.77 +Chris Young,ARZ,Outfielder,74,180,23.49 +Scott Hairston,ARZ,Outfielder,72,188,26.77 +Carlos Quentin,ARZ,Outfielder,73,225,24.51 +Jeff DaVanon,ARZ,Outfielder,72,200,33.23 +Eric Byrnes,ARZ,Outfielder,74,210,31.04 +Livan Hernandez,ARZ,Starting Pitcher,74,245,32.02 +Doug Davis,ARZ,Starting Pitcher,76,213,31.44 +Randy Johnson,ARZ,Starting Pitcher,82,231,43.47 +Juan Cruz,ARZ,Starting Pitcher,74,165,28.38 +Brandon Webb,ARZ,Starting Pitcher,74,228,27.81 +Enrique Gonzalez,ARZ,Starting Pitcher,70,210,24.57 +Dana Eveland,ARZ,Starting Pitcher,73,250,23.34 +Brandon Medders,ARZ,Relief Pitcher,73,191,27.09 +Tony Pe?a,ARZ,Relief Pitcher,74,190,25.14 +Doug Slaten,ARZ,Relief Pitcher,77,200,27.07 +Edgar Gonzalez,ARZ,Relief Pitcher,72,215,24.02 +Jose Valverde,ARZ,Relief Pitcher,76,254,27.6 +Jorge Julio,ARZ,Relief Pitcher,73,232,27.99 +Brandon Lyon,ARZ,Relief Pitcher,73,180,27.56 +Miguel Olivo,FLA,Catcher,72,215,28.63 +Matt Treanor,FLA,Catcher,74,220,30.99 +Mike Jacobs,FLA,First Baseman,74,180,26.33 +Dan Uggla,FLA,Second Baseman,71,200,26.97 +Robert Andino,FLA,Shortstop,72,170,22.85 +Hanley Ramirez,FLA,Shortstop,75,195,23.19 +Miguel Cabrera,FLA,Third Baseman,74,210,23.87 +Aaron Boone,FLA,Third Baseman,74,200,33.98 +Joe Borchard,FLA,Outfielder,77,220,28.26 +Alfredo Amezaga,FLA,Outfielder,70,165,29.12 +Cody Ross,FLA,Outfielder,71,180,26.18 +Josh Willingham,FLA,Outfielder,73,200,28.03 +Jeremy Hermida,FLA,Outfielder,76,200,23.08 +Eric Reed,FLA,Outfielder,71,170,26.24 +Reggi Abercrombie,FLA,Outfielder,75,224,26.63 +Ricky Nolasco,FLA,Starting Pitcher,74,220,24.21 +Anibal Sanchez,FLA,Starting Pitcher,72,180,23.01 +Scott Olsen,FLA,Starting Pitcher,76,198,23.13 +Josh Johnson,FLA,Starting Pitcher,79,240,23.08 +Dontrelle Willis,FLA,Starting Pitcher,76,239,25.13 +Logan Kensing,FLA,Relief Pitcher,73,185,24.66 +Sergio Mitre,FLA,Relief Pitcher,76,210,26.03 +Kevin Gregg,FLA,Relief Pitcher,78,220,28.7 +Travis Bowyer,FLA,Relief Pitcher,75,200,25.57 +Renyel Pinto,FLA,Relief Pitcher,76,195,24.65 +Randy Messenger,FLA,Relief Pitcher,72,220,25.55 +Yusmeiro Petit,FLA,Relief Pitcher,72,230,22.27 +Carlos Martinez,FLA,Relief Pitcher,73,170,24.76 +Taylor Tankersley,FLA,Relief Pitcher,73,220,23.98 +Henry Owens,FLA,Relief Pitcher,75,230,27.85 +Jose Garcia,FLA,Relief Pitcher,71,165,22.14 +Matt Lindstrom,FLA,Relief Pitcher,76,205,27.05 +Javier Valentin,CIN,Catcher,70,192,31.45 +Chad Moeller,CIN,Catcher,75,210,32.03 +David Ross,CIN,Catcher,74,205,29.95 +Joey Votto,CIN,First Baseman,75,200,23.47 +Scott Hatteberg,CIN,First Baseman,73,210,37.21 +Brandon Phillips,CIN,Second Baseman,71,185,25.67 +Juan Castro,CIN,Shortstop,71,195,34.69 +Alex Gonzalez,CIN,Shortstop,72,202,30.04 +Mark Bellhorn,CIN,Third Baseman,73,205,32.52 +Edwin Encarnacion,CIN,Third Baseman,73,195,24.15 +Jeff Keppinger,CIN,Third Baseman,72,180,26.86 +Norris Hopper,CIN,Outfielder,69,200,27.94 +Chris Denorfia,CIN,Outfielder,73,185,26.63 +Adam Dunn,CIN,Outfielder,78,240,27.31 +Bubba Crosby,CIN,Outfielder,71,185,30.55 +Jeff Conine,CIN,Outfielder,73,220,40.68 +Ken Griffey Jr.,CIN,Outfielder,75,205,37.27 +Josh Hamilton,CIN,Outfielder,76,205,25.78 +Ryan Freel,CIN,Outfielder,70,180,30.98 +Kyle Lohse,CIN,Starting Pitcher,74,201,28.41 +Bronson Arroyo,CIN,Starting Pitcher,77,190,30.01 +Eric Milton,CIN,Starting Pitcher,75,208,31.57 +Aaron Harang,CIN,Starting Pitcher,79,240,28.81 +Kirk Saarloos,CIN,Starting Pitcher,72,190,27.77 +Elizardo Ramirez,CIN,Starting Pitcher,72,180,24.09 +Todd Coffey,CIN,Relief Pitcher,77,230,26.47 +Brian Shackelford,CIN,Relief Pitcher,73,195,30.5 +Bill Bray,CIN,Relief Pitcher,75,215,23.74 +Bobby Livingston,CIN,Relief Pitcher,75,190,24.49 +Matt Belisle,CIN,Relief Pitcher,75,195,26.73 +Gary Majewski,CIN,Relief Pitcher,73,215,27.01 +Mike Stanton,CIN,Relief Pitcher,73,215,39.75 +Brad Salmon,CIN,Relief Pitcher,76,220,27.16 +Jared Burton,CIN,Relief Pitcher,77,220,25.74 +David Weathers,CIN,Relief Pitcher,75,230,37.43 +Rheal Cormier,CIN,Relief Pitcher,70,195,39.85 +Yorvit Torrealba,COL,Catcher,71,190,28.62 +Chris Iannetta,COL,Catcher,71,195,23.9 +Alvin Colina,COL,Catcher,75,209,25.18 +Todd Helton,COL,First Baseman,74,204,33.53 +Jamey Carroll,COL,Second Baseman,69,170,33.03 +Kaz Matsui,COL,Second Baseman,70,185,31.35 +Troy Tulowitzki,COL,Shortstop,75,205,22.39 +Clint Barmes,COL,Shortstop,72,175,27.99 +Garrett Atkins,COL,Third Baseman,75,210,27.22 +Ryan Spilborghs,COL,Outfielder,73,190,27.49 +Cory Sullivan,COL,Outfielder,72,180,27.53 +Jeff Salazar,COL,Outfielder,72,180,26.26 +Willy Taveras,COL,Outfielder,72,160,25.18 +Matt Holliday,COL,Outfielder,76,235,27.12 +Brad Hawpe,COL,Outfielder,75,200,27.69 +Jeff Baker,COL,Outfielder,74,210,25.69 +Javy Lopez,COL,Designated Hitter,75,224,36.32 +Byung-Hyun Kim,COL,Starting Pitcher,69,180,28.11 +Rodrigo Lopez,COL,Starting Pitcher,73,190,31.21 +Brian Lawrence,COL,Starting Pitcher,72,197,30.8 +Josh Fogg,COL,Starting Pitcher,72,203,30.21 +Aaron Cook,COL,Starting Pitcher,75,205,28.06 +Denny Bautista,COL,Starting Pitcher,77,170,26.52 +Ubaldo Jimenez,COL,Starting Pitcher,76,200,23.1 +Jason Hirsh,COL,Starting Pitcher,80,250,25.02 +Jeff Francis,COL,Starting Pitcher,77,200,26.14 +Taylor Buchholz,COL,Starting Pitcher,76,220,25.38 +Ryan Speier,COL,Relief Pitcher,79,200,27.6 +Ramon Ramirez,COL,Relief Pitcher,71,190,25.5 +Manny Corpas,COL,Relief Pitcher,75,170,24.24 +Juan Morillo,COL,Relief Pitcher,73,190,23.32 +Brian Fuentes,COL,Relief Pitcher,76,220,31.56 +LaTroy Hawkins,COL,Relief Pitcher,77,215,34.19 +Tom Martin,COL,Relief Pitcher,73,206,36.78 +Jeremy Affeldt,COL,Relief Pitcher,76,215,27.73 +Paul Lo Duca,NYM,Catcher,70,185,34.88 +Ramon Castro,NYM,Catcher,75,235,31 +Julio Franco,NYM,First Baseman,73,188,48.52 +Carlos Delgado,NYM,First Baseman,75,230,34.68 +Jose Valentin,NYM,Second Baseman,70,195,37.38 +Anderson Hernandez,NYM,Second Baseman,69,168,24.33 +Damion Easley,NYM,Shortstop,71,190,37.3 +Jose Reyes,NYM,Shortstop,72,160,23.72 +David Wright,NYM,Third Baseman,72,200,24.19 +Ben Johnson,NYM,Outfielder,73,200,25.7 +Endy Chavez,NYM,Outfielder,70,189,29.06 +David Newhan,NYM,Outfielder,70,180,33.48 +Carlos Beltran,NYM,Outfielder,73,190,29.85 +Shawn Green,NYM,Outfielder,76,200,34.3 +Moises Alou,NYM,Outfielder,75,220,40.66 +Lastings Milledge,NYM,Outfielder,72,187,21.9 +Alay Soler,NYM,Starting Pitcher,73,240,27.39 +Mike Pelfrey,NYM,Starting Pitcher,79,190,23.13 +Pedro Martinez,NYM,Starting Pitcher,71,180,35.35 +Tom Glavine,NYM,Starting Pitcher,72,185,40.93 +Chan Ho Park,NYM,Starting Pitcher,74,210,33.67 +Orlando Hernandez,NYM,Starting Pitcher,74,220,37.39 +Dave Williams,NYM,Starting Pitcher,74,219,27.97 +Oliver Perez,NYM,Starting Pitcher,72,190,25.54 +John Maine,NYM,Starting Pitcher,76,193,25.81 +Marcos Carvajal,NYM,Relief Pitcher,76,175,22.53 +Ambiorix Burgos,NYM,Relief Pitcher,72,180,22.86 +Jason Vargas,NYM,Relief Pitcher,72,215,24.07 +Jon Adkins,NYM,Relief Pitcher,71,210,29.5 +Juan Padilla,NYM,Relief Pitcher,72,200,30.03 +Duaner Sanchez,NYM,Relief Pitcher,72,190,27.38 +Pedro Feliciano,NYM,Relief Pitcher,70,185,30.51 +Aaron Heilman,NYM,Relief Pitcher,77,220,28.3 +Jorge Sosa,NYM,Relief Pitcher,74,170,29.84 +Scott Schoeneweis,NYM,Relief Pitcher,72,195,33.41 +Guillermo Mota,NYM,Relief Pitcher,76,205,33.6 +Billy Wagner,NYM,Relief Pitcher,71,195,35.6 +Philip Humber,NYM,Relief Pitcher,76,210,24.19 +Brad Ausmus,HOU,Catcher,71,190,37.88 +Humberto Quintero,HOU,Catcher,73,190,27.56 +Hector Gimenez,HOU,Catcher,70,180,24.42 +Lance Berkman,HOU,First Baseman,73,220,31.05 +Mike Lamb,HOU,First Baseman,73,190,31.56 +Mark Loretta,HOU,Second Baseman,72,186,35.55 +Craig Biggio,HOU,Second Baseman,71,185,41.21 +Brooks Conrad,HOU,Second Baseman,71,190,27.12 +Chris Burke,HOU,Second Baseman,71,180,26.97 +Eric Bruntlett,HOU,Second Baseman,72,190,28.92 +Adam Everett,HOU,Shortstop,72,170,30.06 +Morgan Ensberg,HOU,Third Baseman,74,210,31.51 +Carlos Lee,HOU,Outfielder,74,240,30.69 +Jason Lane,HOU,Outfielder,74,220,30.19 +Orlando Palmeiro,HOU,Outfielder,71,180,38.11 +Luke Scott,HOU,Outfielder,72,210,28.68 +Charlton Jimerson,HOU,Outfielder,75,210,27.44 +Fernando Nieve,HOU,Starting Pitcher,72,195,24.63 +Wandy Rodriguez,HOU,Starting Pitcher,71,160,28.11 +Brandon Backe,HOU,Starting Pitcher,72,180,28.9 +Matt Albers,HOU,Starting Pitcher,72,205,24.11 +Woody Williams,HOU,Starting Pitcher,72,200,40.53 +Roy Oswalt,HOU,Starting Pitcher,72,185,29.5 +Jason Jennings,HOU,Starting Pitcher,74,245,28.62 +Miguel Asencio,HOU,Relief Pitcher,74,190,26.42 +Brad Lidge,HOU,Relief Pitcher,77,210,30.18 +Trever Miller,HOU,Relief Pitcher,75,200,33.75 +David Borkowski,HOU,Relief Pitcher,73,200,30.06 +Dan Wheeler,HOU,Relief Pitcher,75,222,29.22 +Paul Estrada,HOU,Relief Pitcher,73,215,24.47 +Lincoln Holdzkom,HOU,Relief Pitcher,76,240,24.94 +Chris Sampson,HOU,Relief Pitcher,72,170,28.77 +Chad Qualls,HOU,Relief Pitcher,77,220,28.54 +Ezequiel Astacio,HOU,Relief Pitcher,75,156,27.32 +Mike Lieberthal,LA,Catcher,72,190,35.12 +Russell Martin,LA,Catcher,71,202,24.04 +Olmedo Saenz,LA,First Baseman,71,221,36.39 +James Loney,LA,First Baseman,75,200,22.81 +Nomar Garciaparra,LA,First Baseman,72,190,33.6 +Jeff Kent,LA,Second Baseman,73,210,38.98 +Ramon Martinez,LA,Second Baseman,73,190,34.39 +Marlon Anderson,LA,Second Baseman,71,200,33.15 +Rafael Furcal,LA,Shortstop,70,165,29.35 +Wilson Betemit,LA,Third Baseman,75,190,26.59 +Andy LaRoche,LA,Third Baseman,71,185,23.46 +Matt Kemp,LA,Outfielder,76,230,22.43 +Andre Ethier,LA,Outfielder,73,208,24.89 +Delwyn Young,LA,Outfielder,68,209,24.67 +Jason Repko,LA,Outfielder,71,175,26.17 +Juan Pierre,LA,Outfielder,72,180,29.54 +Luis Gonzalez,LA,Outfielder,74,200,39.49 +Jason Schmidt,LA,Starting Pitcher,77,205,34.08 +Randy Wolf,LA,Starting Pitcher,72,200,30.52 +Brad Penny,LA,Starting Pitcher,76,250,28.77 +Derek Lowe,LA,Starting Pitcher,78,210,33.75 +Mark Hendrickson,LA,Starting Pitcher,81,230,32.69 +Chad Billingsley,LA,Starting Pitcher,72,244,22.59 +Takashi Saito,LA,Relief Pitcher,73,202,37.04 +Jonathan Broxton,LA,Relief Pitcher,76,240,22.7 +Hong-Chih Kuo,LA,Relief Pitcher,72,200,25.6 +Eric Stults,LA,Relief Pitcher,72,215,27.23 +Chin-Hui Tsao,LA,Relief Pitcher,74,177,25.74 +Tim Hamulack,LA,Relief Pitcher,76,210,30.29 +Yhency Brazoban,LA,Relief Pitcher,73,170,26.72 +Brett Tomko,LA,Relief Pitcher,76,215,33.9 +Joe Beimel,LA,Relief Pitcher,75,217,29.86 +Elmer Dessens,LA,Relief Pitcher,70,198,36.13 +Ryan Budde,PHI,Catcher,71,200,27.54 +Rod Barajas,PHI,Catcher,74,220,31.49 +Carlos Ruiz,PHI,Catcher,72,170,28.1 +Chris Coste,PHI,Catcher,73,200,34.07 +Ryan Howard,PHI,First Baseman,76,230,27.28 +Wes Helms,PHI,First Baseman,76,231,30.8 +Chase Utley,PHI,Second Baseman,73,183,28.2 +Danny Sandoval,PHI,Second Baseman,71,192,27.9 +Jimmy Rollins,PHI,Shortstop,68,167,28.26 +Abraham Nu?ez,PHI,Third Baseman,71,190,30.96 +Michael Bourn,PHI,Outfielder,71,180,24.18 +Chris Roberson,PHI,Outfielder,74,180,27.52 +Jayson Werth,PHI,Outfielder,77,215,27.78 +Shane Victorino,PHI,Outfielder,69,160,26.25 +Aaron Rowand,PHI,Outfielder,72,205,29.5 +Pat Burrell,PHI,Outfielder,76,223,30.39 +Greg Dobbs,PHI,Designated Hitter,73,205,28.66 +Cole Hamels,PHI,Starting Pitcher,75,175,23.18 +Alfredo Simon,PHI,Starting Pitcher,76,170,25.81 +Scott Mathieson,PHI,Starting Pitcher,75,190,23.01 +Freddy Garcia,PHI,Starting Pitcher,76,240,31.72 +Jamie Moyer,PHI,Starting Pitcher,72,175,44.28 +Jon Lieber,PHI,Starting Pitcher,74,230,36.91 +Brett Myers,PHI,Starting Pitcher,76,223,26.54 +Adam Eaton,PHI,Starting Pitcher,74,196,29.27 +Geoff Geary,PHI,Relief Pitcher,72,167,30.51 +Clay Condrey,PHI,Relief Pitcher,75,195,31.28 +Ryan Madson,PHI,Relief Pitcher,78,190,26.51 +Antonio Alfonseca,PHI,Relief Pitcher,77,250,34.87 +Tom Gordon,PHI,Relief Pitcher,70,190,39.28 +Brian Sanches,PHI,Relief Pitcher,72,190,28.56 +Jim Ed Warden,PHI,Relief Pitcher,79,190,27.82 +Anderson Garcia,PHI,Relief Pitcher,74,170,25.94 +Eude Brito,PHI,Relief Pitcher,71,160,28.53 +Fabio Castro,PHI,Relief Pitcher,68,150,22.11 +Matt Smith,PHI,Relief Pitcher,77,225,27.71 +Damian Miller,MLW,Catcher,75,220,37.38 +Johnny Estrada,MLW,Catcher,71,209,30.67 +Mike Rivera,MLW,Catcher,72,210,30.48 +J.D. Closser,MLW,Catcher,70,176,27.12 +Prince Fielder,MLW,First Baseman,72,260,22.81 +Rickie Weeks,MLW,Second Baseman,72,195,24.46 +Tony Graffanino,MLW,Second Baseman,73,190,34.73 +Craig Counsell,MLW,Shortstop,72,184,36.53 +J.J. Hardy,MLW,Shortstop,74,180,24.53 +Bill Hall,MLW,Shortstop,72,195,27.17 +Vinny Rottino,MLW,Third Baseman,72,195,26.9 +Corey Koskie,MLW,Third Baseman,75,219,33.67 +Kevin Mench,MLW,Outfielder,72,225,29.14 +Geoff Jenkins,MLW,Outfielder,73,212,32.61 +Brady Clark,MLW,Outfielder,74,202,33.87 +Tony Gwynn Jr.,MLW,Outfielder,72,185,24.41 +Corey Hart,MLW,Outfielder,78,200,24.94 +Gabe Gross,MLW,Outfielder,75,209,27.36 +Laynce Nix,MLW,Outfielder,72,200,26.33 +Drew Anderson,MLW,Outfielder,74,195,25.72 +Claudio Vargas,MLW,Starting Pitcher,75,228,28.7 +Chris Capuano,MLW,Starting Pitcher,75,210,28.53 +Ben Hendrickson,MLW,Starting Pitcher,76,190,26.07 +Dave Bush,MLW,Starting Pitcher,74,212,27.31 +Carlos Villanueva,MLW,Starting Pitcher,74,190,23.26 +Ben Sheets,MLW,Starting Pitcher,73,218,28.62 +Jeff Suppan,MLW,Starting Pitcher,74,220,32.16 +Brian Shouse,MLW,Relief Pitcher,71,190,38.43 +Francisco Cordero,MLW,Relief Pitcher,74,235,31.81 +Derrick Turnbow,MLW,Relief Pitcher,75,210,29.1 +Matt Wise,MLW,Relief Pitcher,76,200,31.28 +Grant Balfour,MLW,Relief Pitcher,74,188,29.17 +Dennis Sarfate,MLW,Relief Pitcher,76,210,25.89 +Jose Capellan,MLW,Relief Pitcher,76,235,26.13 +Greg Aquino,MLW,Relief Pitcher,73,188,29.13 +Josh Bard,SD,Catcher,75,215,28.92 +Rob Bowen,SD,Catcher,75,216,26.01 +Adrian Gonzalez,SD,First Baseman,74,220,24.81 +Marcus Giles,SD,Second Baseman,68,180,28.79 +Todd Walker,SD,Second Baseman,72,185,33.77 +Geoff Blum,SD,Shortstop,75,200,33.85 +Khalil Greene,SD,Shortstop,71,210,27.36 +Paul McAnulty,SD,Outfielder,70,220,26.01 +Terrmel Sledge,SD,Outfielder,72,185,29.95 +Jack Cust,SD,Outfielder,73,231,28.12 +Jose Cruz Jr.,SD,Outfielder,72,210,32.87 +Russell Branyan,SD,Outfielder,75,195,31.2 +Mike Cameron,SD,Outfielder,74,200,34.14 +Brian Giles,SD,Outfielder,70,205,36.11 +Kevin Kouzmanoff,SD,Designated Hitter,73,200,25.6 +Mike Thompson,SD,Starting Pitcher,76,200,26.31 +Clay Hensley,SD,Starting Pitcher,71,190,27.5 +Chris Young,SD,Starting Pitcher,82,250,27.77 +Greg Maddux,SD,Starting Pitcher,72,185,40.88 +Jake Peavy,SD,Starting Pitcher,73,180,25.75 +Scott Cassidy,SD,Relief Pitcher,74,170,31.41 +Scott Strickland,SD,Relief Pitcher,71,180,30.84 +Scott Linebrink,SD,Relief Pitcher,75,208,30.57 +Doug Brocail,SD,Relief Pitcher,77,235,39.79 +Trevor Hoffman,SD,Relief Pitcher,72,215,39.38 +Heath Bell,SD,Relief Pitcher,74,244,29.42 +Royce Ring,SD,Relief Pitcher,72,220,26.19 +Cla Meredith,SD,Relief Pitcher,73,185,23.74 +Andrew Brown,SD,Relief Pitcher,78,230,26.03 +Mike Adams,SD,Relief Pitcher,77,190,28.59 +Justin Hampson,SD,Relief Pitcher,73,200,26.77 +Kevin Cameron,SD,Relief Pitcher,73,180,27.21 +Ryan Ketchner,SD,Relief Pitcher,73,190,24.87 +Brian Schneider,WAS,Catcher,73,196,30.26 +Jesus Flores,WAS,Catcher,73,180,22.34 +Larry Broadway,WAS,First Baseman,76,230,26.2 +Nick Johnson,WAS,First Baseman,75,224,28.45 +Bernie Castro,WAS,Second Baseman,70,160,27.63 +Josh Wilson,WAS,Shortstop,73,178,25.93 +Cristian Guzman,WAS,Shortstop,72,205,28.94 +Felipe Lopez,WAS,Shortstop,73,185,26.8 +Ryan Zimmerman,WAS,Third Baseman,75,210,22.42 +Nook Logan,WAS,Outfielder,74,180,27.26 +Ryan Church,WAS,Outfielder,73,190,28.38 +Kory Casto,WAS,Outfielder,73,200,25.23 +Mike Restovich,WAS,Outfielder,76,257,28.16 +Alex Escobar,WAS,Outfielder,73,190,28.48 +Austin Kearns,WAS,Outfielder,75,220,26.78 +Chris Snelling,WAS,Outfielder,70,165,25.24 +Billy Traber,WAS,Starting Pitcher,77,205,27.45 +Tim Redding,WAS,Starting Pitcher,72,200,29.05 +John Patterson,WAS,Starting Pitcher,77,208,29.08 +Shawn Hill,WAS,Starting Pitcher,74,185,25.84 +Joel Hanrahan,WAS,Starting Pitcher,75,215,25.4 +Mike O'Connor,WAS,Starting Pitcher,75,170,26.54 +Emiliano Fruto,WAS,Relief Pitcher,75,235,22.73 +Chris Schroder,WAS,Relief Pitcher,75,210,28.53 +Brett Campbell,WAS,Relief Pitcher,72,170,25.37 +Beltran Perez,WAS,Relief Pitcher,74,180,25.35 +Levale Speigner,WAS,Relief Pitcher,71,170,26.43 +Jason Bergmann,WAS,Relief Pitcher,76,190,25.43 +Saul Rivera,WAS,Relief Pitcher,71,150,29.23 +Chris Booker,WAS,Relief Pitcher,75,230,30.22 +Micah Bowie,WAS,Relief Pitcher,76,203,32.3 +Jon Rauch,WAS,Relief Pitcher,83,260,28.42 +Jerome Williams,WAS,Relief Pitcher,75,246,25.24 +Luis Ayala,WAS,Relief Pitcher,74,186,29.13 +Ryan Wagner,WAS,Relief Pitcher,76,210,24.63 +Chad Cordero,WAS,Relief Pitcher,72,198,24.95 +Humberto Cota,PIT,Catcher,72,210,28.06 +Ronny Paulino,PIT,Catcher,75,215,25.86 +Adam LaRoche,PIT,First Baseman,75,180,27.32 +Ryan Doumit,PIT,First Baseman,72,200,25.91 +Brad Eldred,PIT,First Baseman,77,245,26.63 +Jose Castillo,PIT,Second Baseman,73,200,25.95 +Jack Wilson,PIT,Shortstop,72,192,29.17 +Freddy Sanchez,PIT,Third Baseman,70,192,29.19 +Jason Bay,PIT,Outfielder,74,200,28.44 +Jose Bautista,PIT,Outfielder,72,192,26.36 +Xavier Nady,PIT,Outfielder,74,205,28.29 +Jody Gerut,PIT,Outfielder,72,190,29.45 +Nate McLouth,PIT,Outfielder,71,186,25.34 +Chris Duffy,PIT,Outfielder,70,170,26.86 +Rajai Davis,PIT,Outfielder,71,197,26.36 +Shane Youman,PIT,Starting Pitcher,76,219,27.39 +Yoslan Herrera,PIT,Starting Pitcher,74,200,25.84 +Josh Shortslef,PIT,Starting Pitcher,76,220,25.08 +Zach Duke,PIT,Starting Pitcher,74,207,23.87 +Paul Maholm,PIT,Starting Pitcher,74,225,24.68 +Tom Gorzelanny,PIT,Starting Pitcher,74,207,24.64 +Shawn Chacon,PIT,Starting Pitcher,75,212,29.19 +Tony Armas Jr.,PIT,Starting Pitcher,75,225,28.84 +Ian Snell,PIT,Starting Pitcher,71,170,25.33 +Sean Burnett,PIT,Starting Pitcher,71,190,24.45 +John Grabow,PIT,Relief Pitcher,74,210,28.32 +Marty McLeary,PIT,Relief Pitcher,77,230,32.34 +Salomon Torres,PIT,Relief Pitcher,71,210,34.97 +Damaso Marte,PIT,Relief Pitcher,74,200,32.04 +Matt Capps,PIT,Relief Pitcher,75,238,23.49 +Josh Sharpless,PIT,Relief Pitcher,77,234,26.09 +Bryan Bullington,PIT,Relief Pitcher,76,222,26.41 +Jonah Bayliss,PIT,Relief Pitcher,74,200,26.55 +Brian Rogers,PIT,Relief Pitcher,76,190,24.62 +Juan Perez,PIT,Relief Pitcher,72,170,28.49 +Bengie Molina,SF,Catcher,71,220,32.61 +Eliezer Alfonzo,SF,Catcher,72,223,28.06 +Lance Niekro,SF,First Baseman,75,210,28.08 +Mark Sweeney,SF,First Baseman,73,215,37.34 +Ray Durham,SF,Second Baseman,68,196,35.25 +Kevin Frandsen,SF,Second Baseman,72,175,24.77 +Omar Vizquel,SF,Shortstop,69,175,39.85 +Rich Aurilia,SF,Third Baseman,73,189,35.49 +Pedro Feliz,SF,Third Baseman,73,205,31.84 +Todd Linden,SF,Outfielder,75,210,26.67 +Dave Roberts,SF,Outfielder,70,180,34.75 +Jason Ellison,SF,Outfielder,70,180,28.91 +Randy Winn,SF,Outfielder,74,197,32.73 +Ryan Klesko,SF,Outfielder,75,220,35.72 +Barry Bonds,SF,Outfielder,74,228,42.6 +Fred Lewis,SF,Outfielder,74,190,26.22 +Kelyn Acosta,SF,Starting Pitcher,73,204,21.85 +Jonathan Sanchez,SF,Starting Pitcher,74,165,24.28 +Matt Cain,SF,Starting Pitcher,75,216,22.41 +Matt Morris,SF,Starting Pitcher,77,220,32.56 +Russ Ortiz,SF,Starting Pitcher,73,208,32.74 +Noah Lowry,SF,Starting Pitcher,74,210,26.39 +Barry Zito,SF,Starting Pitcher,76,215,28.8 +Vinnie Chulk,SF,Relief Pitcher,74,195,28.2 +Kevin Correia,SF,Relief Pitcher,75,200,26.52 +Steve Kline,SF,Relief Pitcher,73,215,34.52 +Armando Benitez,SF,Relief Pitcher,76,229,34.32 +Scott Munter,SF,Relief Pitcher,78,240,26.98 +Jack Taschner,SF,Relief Pitcher,75,207,28.86 +Brian Wilson,SF,Relief Pitcher,73,205,24.96 +Merkin Valdez,SF,Relief Pitcher,77,208,25.3 +Brad Hennessey,SF,Relief Pitcher,74,185,27.06 +Billy Sadler,SF,Relief Pitcher,72,190,25.44 +Pat Misch,SF,Relief Pitcher,74,170,25.53 +Gary Bennett,STL,Catcher,72,208,34.87 +Yadier Molina,STL,Catcher,71,225,24.63 +John Nelson,STL,First Baseman,73,190,27.99 +Albert Pujols,STL,First Baseman,75,225,27.12 +Adam Kennedy,STL,Second Baseman,73,185,31.14 +Aaron Miles,STL,Second Baseman,67,180,30.21 +David Eckstein,STL,Shortstop,67,165,32.11 +Scott Rolen,STL,Third Baseman,76,240,31.91 +Scott Spiezio,STL,Third Baseman,74,220,34.44 +Jim Edmonds,STL,Outfielder,73,212,36.68 +So Taguchi,STL,Outfielder,70,163,37.66 +Juan Encarnacion,STL,Outfielder,75,215,30.98 +Skip Schumaker,STL,Outfielder,70,175,27.07 +John Rodriguez,STL,Outfielder,72,205,29.11 +Chris Duncan,STL,Outfielder,77,210,25.82 +Adam Wainwright,STL,Starting Pitcher,79,205,25.5 +Mark Mulder,STL,Starting Pitcher,78,208,29.57 +Anthony Reyes,STL,Starting Pitcher,74,215,25.37 +Ryan Franklin,STL,Starting Pitcher,75,180,33.99 +Kip Wells,STL,Starting Pitcher,75,200,29.86 +Chris Carpenter,STL,Starting Pitcher,78,230,31.84 +Russ Springer,STL,Relief Pitcher,76,211,38.31 +Jason Isringhausen,STL,Relief Pitcher,75,230,34.48 +Ricardo Rincon,STL,Relief Pitcher,69,190,36.88 +Braden Looper,STL,Relief Pitcher,75,220,32.34 +Randy Flores,STL,Relief Pitcher,72,180,31.58 +Josh Hancock,STL,Relief Pitcher,75,205,28.89 +Brad Thompson,STL,Relief Pitcher,73,190,25.08 +Tyler Johnson,STL,Relief Pitcher,74,180,25.73 +Chris Narveson,STL,Relief Pitcher,75,205,25.19 +Randy Keisler,STL,Relief Pitcher,75,190,31.01 +Josh Kinney,STL,Relief Pitcher,73,195,27.92 \ No newline at end of file diff --git a/tests/common/storages/samples/csv/mlb_teams_2012.csv b/tests/common/storages/samples/csv/mlb_teams_2012.csv new file mode 100644 index 0000000000..c10f362e93 --- /dev/null +++ b/tests/common/storages/samples/csv/mlb_teams_2012.csv @@ -0,0 +1,32 @@ +Team,Payroll(millions),Wins +Nationals,81.34,98 +Reds,82.20,97 +Yankees,197.96,95 +Giants,117.62,94 +Braves,83.31,94 +Athletics,55.37,94 +Rangers,120.51,93 +Orioles,81.43,93 +Rays,64.17,90 +Angels,154.49,89 +Tigers,132.30,88 +Cardinals,110.30,88 +Dodgers,95.14,86 +WhiteSox,96.92,85 +Brewers,97.65,83 +Phillies,174.54,81 +Diamondbacks,74.28,81 +Pirates,63.43,79 +Padres,55.24,76 +Mariners,81.97,75 +Mets,93.35,74 +BlueJays,75.48,73 +Royals,60.91,72 +Marlins,118.07,69 +RedSox,173.18,69 +Indians,78.43,68 +Twins,94.08,66 +Rockies,78.06,64 +Cubs,88.19,61 +Astros,60.65,55 + diff --git a/tests/common/storages/samples/jsonl/mlb_players.jsonl b/tests/common/storages/samples/jsonl/mlb_players.jsonl new file mode 100644 index 0000000000..7f8450a569 --- /dev/null +++ b/tests/common/storages/samples/jsonl/mlb_players.jsonl @@ -0,0 +1,1034 @@ +{"Name":"Adam Donachie","Team":"BAL","Position":"Catcher","Height(inches)":74,"Weight(lbs)":180,"Age":22} +{"Name":"Paul Bako","Team":"BAL","Position":"Catcher","Height(inches)":74,"Weight(lbs)":215,"Age":34} +{"Name":"Ramon Hernandez","Team":"BAL","Position":"Catcher","Height(inches)":72,"Weight(lbs)":210,"Age":30} +{"Name":"Kevin Millar","Team":"BAL","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":210,"Age":35} +{"Name":"Chris Gomez","Team":"BAL","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":188,"Age":35} +{"Name":"Brian Roberts","Team":"BAL","Position":"Second Baseman","Height(inches)":69,"Weight(lbs)":176,"Age":29} +{"Name":"Miguel Tejada","Team":"BAL","Position":"Shortstop","Height(inches)":69,"Weight(lbs)":209,"Age":30} +{"Name":"Melvin Mora","Team":"BAL","Position":"Third Baseman","Height(inches)":71,"Weight(lbs)":200,"Age":35} +{"Name":"Aubrey Huff","Team":"BAL","Position":"Third Baseman","Height(inches)":76,"Weight(lbs)":231,"Age":30} +{"Name":"Adam Stern","Team":"BAL","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":180,"Age":27} +{"Name":"Jeff Fiorentino","Team":"BAL","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":188,"Age":23} +{"Name":"Freddie Bynum","Team":"BAL","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":180,"Age":26} +{"Name":"Nick Markakis","Team":"BAL","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":185,"Age":23} +{"Name":"Brandon Fahey","Team":"BAL","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":160,"Age":26} +{"Name":"Corey Patterson","Team":"BAL","Position":"Outfielder","Height(inches)":69,"Weight(lbs)":180,"Age":27} +{"Name":"Jay Payton","Team":"BAL","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":185,"Age":34} +{"Name":"Jay Gibbons","Team":"BAL","Position":"Designated Hitter","Height(inches)":72,"Weight(lbs)":197,"Age":30} +{"Name":"Erik Bedard","Team":"BAL","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":189,"Age":27} +{"Name":"Hayden Penn","Team":"BAL","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":185,"Age":22} +{"Name":"Adam Loewen","Team":"BAL","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":219,"Age":22} +{"Name":"Daniel Cabrera","Team":"BAL","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":230,"Age":25} +{"Name":"Steve Trachsel","Team":"BAL","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":205,"Age":36} +{"Name":"Jaret Wright","Team":"BAL","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":230,"Age":31} +{"Name":"Kris Benson","Team":"BAL","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":195,"Age":32} +{"Name":"Scott Williamson","Team":"BAL","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":31} +{"Name":"John Parrish","Team":"BAL","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":192,"Age":29} +{"Name":"Danys Baez","Team":"BAL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":225,"Age":29} +{"Name":"Chad Bradford","Team":"BAL","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":203,"Age":32} +{"Name":"Jamie Walker","Team":"BAL","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":195,"Age":35} +{"Name":"Brian Burres","Team":"BAL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":182,"Age":25} +{"Name":"Kurt Birkins","Team":"BAL","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":188,"Age":26} +{"Name":"James Hoey","Team":"BAL","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":200,"Age":24} +{"Name":"Sendy Rleal","Team":"BAL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":26} +{"Name":"Chris Ray","Team":"BAL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":25} +{"Name":"Jeremy Guthrie","Team":"BAL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":27} +{"Name":"A.J. Pierzynski","Team":"CWS","Position":"Catcher","Height(inches)":75,"Weight(lbs)":245,"Age":30} +{"Name":"Toby Hall","Team":"CWS","Position":"Catcher","Height(inches)":75,"Weight(lbs)":240,"Age":31} +{"Name":"Paul Konerko","Team":"CWS","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":215,"Age":30} +{"Name":"Tadahito Iguchi","Team":"CWS","Position":"Second Baseman","Height(inches)":69,"Weight(lbs)":185,"Age":32} +{"Name":"Juan Uribe","Team":"CWS","Position":"Shortstop","Height(inches)":71,"Weight(lbs)":175,"Age":27} +{"Name":"Alex Cintron","Team":"CWS","Position":"Shortstop","Height(inches)":74,"Weight(lbs)":199,"Age":28} +{"Name":"Joe Crede","Team":"CWS","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":200,"Age":28} +{"Name":"Josh Fields","Team":"CWS","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":215,"Age":24} +{"Name":"Ryan Sweeney","Team":"CWS","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":200,"Age":22} +{"Name":"Brian N. Anderson","Team":"CWS","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":205,"Age":24} +{"Name":"Luis Terrero","Team":"CWS","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":206,"Age":26} +{"Name":"Pablo Ozuna","Team":"CWS","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":186,"Age":32} +{"Name":"Scott Podsednik","Team":"CWS","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":188,"Age":30} +{"Name":"Jermaine Dye","Team":"CWS","Position":"Outfielder","Height(inches)":77,"Weight(lbs)":220,"Age":33} +{"Name":"Darin Erstad","Team":"CWS","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":210,"Age":32} +{"Name":"Rob Mackowiak","Team":"CWS","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":195,"Age":30} +{"Name":"Jim Thome","Team":"CWS","Position":"Designated Hitter","Height(inches)":76,"Weight(lbs)":244,"Age":36} +{"Name":"Jerry Owens","Team":"CWS","Position":"Designated Hitter","Height(inches)":75,"Weight(lbs)":195,"Age":26} +{"Name":"Charlie Haeger","Team":"CWS","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":23} +{"Name":"Heath Phillips","Team":"CWS","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":24} +{"Name":"Gavin Floyd","Team":"CWS","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":212,"Age":24} +{"Name":"Jose Contreras","Team":"CWS","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":224,"Age":35} +{"Name":"Jon Garland","Team":"CWS","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":210,"Age":27} +{"Name":"Javier Vazquez","Team":"CWS","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":205,"Age":30} +{"Name":"Mark Buehrle","Team":"CWS","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":27} +{"Name":"Mike MacDougal","Team":"CWS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":195,"Age":29} +{"Name":"David Aardsma","Team":"CWS","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":25} +{"Name":"Andrew Sisco","Team":"CWS","Position":"Relief Pitcher","Height(inches)":81,"Weight(lbs)":260,"Age":24} +{"Name":"Matt Thornton","Team":"CWS","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":228,"Age":30} +{"Name":"Bobby Jenks","Team":"CWS","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":270,"Age":25} +{"Name":"Boone Logan","Team":"CWS","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":22} +{"Name":"Sean Tracey","Team":"CWS","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":210,"Age":26} +{"Name":"Nick Masset","Team":"CWS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":190,"Age":24} +{"Name":"Jose Molina","Team":"ANA","Position":"Catcher","Height(inches)":74,"Weight(lbs)":220,"Age":31} +{"Name":"Jeff Mathis","Team":"ANA","Position":"Catcher","Height(inches)":72,"Weight(lbs)":180,"Age":23} +{"Name":"Mike Napoli","Team":"ANA","Position":"Catcher","Height(inches)":72,"Weight(lbs)":205,"Age":25} +{"Name":"Casey Kotchman","Team":"ANA","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":210,"Age":24} +{"Name":"Kendry Morales","Team":"ANA","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":220,"Age":23} +{"Name":"Shea Hillenbrand","Team":"ANA","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":211,"Age":31} +{"Name":"Robb Quinlan","Team":"ANA","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":200,"Age":29} +{"Name":"Howie Kendrick","Team":"ANA","Position":"First Baseman","Height(inches)":70,"Weight(lbs)":180,"Age":23} +{"Name":"Orlando Cabrera","Team":"ANA","Position":"Shortstop","Height(inches)":70,"Weight(lbs)":190,"Age":32} +{"Name":"Erick Aybar","Team":"ANA","Position":"Shortstop","Height(inches)":70,"Weight(lbs)":170,"Age":23} +{"Name":"Dallas McPherson","Team":"ANA","Position":"Third Baseman","Height(inches)":76,"Weight(lbs)":230,"Age":26} +{"Name":"Maicer Izturis","Team":"ANA","Position":"Third Baseman","Height(inches)":68,"Weight(lbs)":155,"Age":26} +{"Name":"Reggie Willits","Team":"ANA","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":185,"Age":25} +{"Name":"Tommy Murphy","Team":"ANA","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":185,"Age":27} +{"Name":"Terry Evans","Team":"ANA","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":200,"Age":25} +{"Name":"Gary Matthews Jr.","Team":"ANA","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":225,"Age":32} +{"Name":"Garret Anderson","Team":"ANA","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":225,"Age":34} +{"Name":"Vladimir Guerrero","Team":"ANA","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":220,"Age":31} +{"Name":"Chone Figgins","Team":"ANA","Position":"Outfielder","Height(inches)":68,"Weight(lbs)":160,"Age":29} +{"Name":"Juan Rivera","Team":"ANA","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":205,"Age":28} +{"Name":"John Lackey","Team":"ANA","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":235,"Age":28} +{"Name":"Bartolo Colon","Team":"ANA","Position":"Starting Pitcher","Height(inches)":71,"Weight(lbs)":250,"Age":33} +{"Name":"Kelvim Escobar","Team":"ANA","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":210,"Age":30} +{"Name":"Dustin Moseley","Team":"ANA","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":190,"Age":37} +{"Name":"Ervin Santana","Team":"ANA","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":160,"Age":24} +{"Name":"Joe Saunders","Team":"ANA","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":25} +{"Name":"Jered Weaver","Team":"ANA","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":205,"Age":24} +{"Name":"Chris Resop","Team":"ANA","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":222,"Age":24} +{"Name":"Phil Seibel","Team":"ANA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":195,"Age":28} +{"Name":"Justin Speier","Team":"ANA","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":205,"Age":33} +{"Name":"Darren Oliver","Team":"ANA","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":36} +{"Name":"Hector Carrasco","Team":"ANA","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":37} +{"Name":"Scot Shields","Team":"ANA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":170,"Age":31} +{"Name":"Francisco Rodriguez","Team":"ANA","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":185,"Age":25} +{"Name":"Greg Jones","Team":"ANA","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":195,"Age":30} +{"Name":"Doug Mirabelli","Team":"BOS","Position":"Catcher","Height(inches)":73,"Weight(lbs)":220,"Age":36} +{"Name":"Jason Varitek","Team":"BOS","Position":"Catcher","Height(inches)":74,"Weight(lbs)":230,"Age":34} +{"Name":"George Kottaras","Team":"BOS","Position":"Catcher","Height(inches)":72,"Weight(lbs)":180,"Age":23} +{"Name":"Kevin Youkilis","Team":"BOS","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":220,"Age":27} +{"Name":"Dustin Pedroia","Team":"BOS","Position":"Second Baseman","Height(inches)":69,"Weight(lbs)":180,"Age":23} +{"Name":"Alex Cora","Team":"BOS","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":180,"Age":31} +{"Name":"Julio Lugo","Team":"BOS","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":170,"Age":31} +{"Name":"Mike Lowell","Team":"BOS","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":210,"Age":33} +{"Name":"Wily Mo Pe?a","Team":"BOS","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":215,"Age":25} +{"Name":"J.D. Drew","Team":"BOS","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":200,"Age":31} +{"Name":"Manny Ramirez","Team":"BOS","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":213,"Age":34} +{"Name":"Brandon Moss","Team":"BOS","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":180,"Age":23} +{"Name":"David Murphy","Team":"BOS","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":192,"Age":25} +{"Name":"Eric Hinske","Team":"BOS","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":235,"Age":29} +{"Name":"Coco Crisp","Team":"BOS","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":185,"Age":27} +{"Name":"David Ortiz","Team":"BOS","Position":"Designated Hitter","Height(inches)":76,"Weight(lbs)":230,"Age":31} +{"Name":"Curt Schilling","Team":"BOS","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":235,"Age":40} +{"Name":"Tim Wakefield","Team":"BOS","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":210,"Age":40} +{"Name":"Josh Beckett","Team":"BOS","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":222,"Age":26} +{"Name":"Matt Clement","Team":"BOS","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":210,"Age":32} +{"Name":"Jonathan Papelbon","Team":"BOS","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":230,"Age":26} +{"Name":"Kyle Snyder","Team":"BOS","Position":"Starting Pitcher","Height(inches)":80,"Weight(lbs)":220,"Age":29} +{"Name":"Devern Hansack","Team":"BOS","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":29} +{"Name":"Jon Lester","Team":"BOS","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":23} +{"Name":"Kason Gabbard","Team":"BOS","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":24} +{"Name":"Craig Hansen","Team":"BOS","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":210,"Age":23} +{"Name":"Hideki Okajima","Team":"BOS","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":194,"Age":31} +{"Name":"Craig Breslow","Team":"BOS","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":26} +{"Name":"Manny Delcarmen","Team":"BOS","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":25} +{"Name":"Brendan Donnelly","Team":"BOS","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":240,"Age":35} +{"Name":"Javier Lopez","Team":"BOS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":200,"Age":29} +{"Name":"J.C. Romero","Team":"BOS","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":198,"Age":30} +{"Name":"Joel Pineiro","Team":"BOS","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":28} +{"Name":"Julian Tavarez","Team":"BOS","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":195,"Age":33} +{"Name":"Mike Timlin","Team":"BOS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":210,"Age":40} +{"Name":"Nick DeBarr","Team":"BOS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":220,"Age":23} +{"Name":"Victor Martinez","Team":"CLE","Position":"Catcher","Height(inches)":74,"Weight(lbs)":190,"Age":28} +{"Name":"Kelly Shoppach","Team":"CLE","Position":"Catcher","Height(inches)":73,"Weight(lbs)":210,"Age":26} +{"Name":"Ryan Garko","Team":"CLE","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":225,"Age":26} +{"Name":"Joe Inglett","Team":"CLE","Position":"Second Baseman","Height(inches)":70,"Weight(lbs)":180,"Age":28} +{"Name":"Josh Barfield","Team":"CLE","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":185,"Age":24} +{"Name":"Hector Luna","Team":"CLE","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":170,"Age":27} +{"Name":"Jhonny Peralta","Team":"CLE","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":185,"Age":24} +{"Name":"Andy Marte","Team":"CLE","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":185,"Age":23} +{"Name":"Ben Francisco","Team":"CLE","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":180,"Age":25} +{"Name":"Shin-Soo Choo","Team":"CLE","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":178,"Age":24} +{"Name":"Franklin Gutierrez","Team":"CLE","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":175,"Age":24} +{"Name":"Grady Sizemore","Team":"CLE","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":200,"Age":24} +{"Name":"Jason Michaels","Team":"CLE","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":204,"Age":30} +{"Name":"Trot Nixon","Team":"CLE","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":211,"Age":32} +{"Name":"David Dellucci","Team":"CLE","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":190,"Age":33} +{"Name":"Casey Blake","Team":"CLE","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":210,"Age":33} +{"Name":"Travis Hafner","Team":"CLE","Position":"Designated Hitter","Height(inches)":75,"Weight(lbs)":240,"Age":29} +{"Name":"Paul Byrd","Team":"CLE","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":36} +{"Name":"Cliff Lee","Team":"CLE","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":28} +{"Name":"Jake Westbrook","Team":"CLE","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":185,"Age":29} +{"Name":"C.C. Sabathia","Team":"CLE","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":290,"Age":26} +{"Name":"Jeremy Sowers","Team":"CLE","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":175,"Age":23} +{"Name":"Rafael Perez","Team":"CLE","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":185,"Age":24} +{"Name":"Brian Slocum","Team":"CLE","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":200,"Age":25} +{"Name":"Edward Mujica","Team":"CLE","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":22} +{"Name":"Fernando Cabrera","Team":"CLE","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":170,"Age":25} +{"Name":"Tom Mastny","Team":"CLE","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":220,"Age":26} +{"Name":"Juan Lara","Team":"CLE","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":26} +{"Name":"Fausto Carmona","Team":"CLE","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":220,"Age":23} +{"Name":"Aaron Fultz","Team":"CLE","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":205,"Age":33} +{"Name":"Rafael Betancourt","Team":"CLE","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":31} +{"Name":"Roberto Hernandez","Team":"CLE","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":250,"Age":42} +{"Name":"Joe Borowski","Team":"CLE","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":225,"Age":35} +{"Name":"Matt Miller","Team":"CLE","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":215,"Age":35} +{"Name":"Jason Davis","Team":"CLE","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":210,"Age":26} +{"Name":"Mike Piazza","Team":"OAK","Position":"Catcher","Height(inches)":75,"Weight(lbs)":215,"Age":38} +{"Name":"Jason Kendall","Team":"OAK","Position":"Catcher","Height(inches)":72,"Weight(lbs)":195,"Age":32} +{"Name":"Adam Melhuse","Team":"OAK","Position":"Catcher","Height(inches)":74,"Weight(lbs)":200,"Age":34} +{"Name":"Nick Swisher","Team":"OAK","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":194,"Age":26} +{"Name":"Dan Johnson","Team":"OAK","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":220,"Age":27} +{"Name":"Donald Murphy","Team":"OAK","Position":"Second Baseman","Height(inches)":70,"Weight(lbs)":180,"Age":23} +{"Name":"Mark Ellis","Team":"OAK","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":180,"Age":29} +{"Name":"Marco Scutaro","Team":"OAK","Position":"Shortstop","Height(inches)":70,"Weight(lbs)":170,"Age":31} +{"Name":"Bobby Crosby","Team":"OAK","Position":"Shortstop","Height(inches)":75,"Weight(lbs)":195,"Age":27} +{"Name":"Mark Kiger","Team":"OAK","Position":"Shortstop","Height(inches)":71,"Weight(lbs)":180,"Age":26} +{"Name":"Antonio Perez","Team":"OAK","Position":"Third Baseman","Height(inches)":71,"Weight(lbs)":170,"Age":27} +{"Name":"Eric Chavez","Team":"OAK","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":206,"Age":29} +{"Name":"Milton Bradley","Team":"OAK","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":205,"Age":28} +{"Name":"Shannon Stewart","Team":"OAK","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":200,"Age":33} +{"Name":"Bobby Kielty","Team":"OAK","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":225,"Age":30} +{"Name":"Mark Kotsay","Team":"OAK","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":201,"Age":31} +{"Name":"Ryan Goleski","Team":"OAK","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":225,"Age":24} +{"Name":"Jeremy Brown","Team":"OAK","Position":"Designated Hitter","Height(inches)":70,"Weight(lbs)":226,"Age":27} +{"Name":"Jason Windsor","Team":"OAK","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":233,"Age":24} +{"Name":"David Shafer","Team":"OAK","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":24} +{"Name":"Joe Blanton","Team":"OAK","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":225,"Age":26} +{"Name":"Brad Halsey","Team":"OAK","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":26} +{"Name":"Dan Haren","Team":"OAK","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":220,"Age":26} +{"Name":"Rich Harden","Team":"OAK","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":25} +{"Name":"Joe Kennedy","Team":"OAK","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":237,"Age":27} +{"Name":"Esteban Loaiza","Team":"OAK","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":215,"Age":35} +{"Name":"Alan Embree","Team":"OAK","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":37} +{"Name":"Jay Witasick","Team":"OAK","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":235,"Age":34} +{"Name":"Justin Duchscherer","Team":"OAK","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":29} +{"Name":"Kiko Calero","Team":"OAK","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":32} +{"Name":"Chad Gaudin","Team":"OAK","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":165,"Age":23} +{"Name":"Lenny DiNardo","Team":"OAK","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":195,"Age":27} +{"Name":"Scott Dunn","Team":"OAK","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":28} +{"Name":"Huston Street","Team":"OAK","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":190,"Age":23} +{"Name":"Ron Flores","Team":"OAK","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":190,"Age":27} +{"Name":"Jay Marshall","Team":"OAK","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":185,"Age":24} +{"Name":"Marcus McBeth","Team":"OAK","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":185,"Age":26} +{"Name":"Jorge Posada","Team":"NYY","Position":"Catcher","Height(inches)":74,"Weight(lbs)":205,"Age":35} +{"Name":"Wil Nieves","Team":"NYY","Position":"Catcher","Height(inches)":71,"Weight(lbs)":190,"Age":29} +{"Name":"Andy Phillips","Team":"NYY","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":205,"Age":29} +{"Name":"Doug Mientkiewicz","Team":"NYY","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":206,"Age":32} +{"Name":"Josh Phelps","Team":"NYY","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":220,"Age":28} +{"Name":"Miguel Cairo","Team":"NYY","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":208,"Age":32} +{"Name":"Robinson Cano","Team":"NYY","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":170,"Age":24} +{"Name":"Derek Jeter","Team":"NYY","Position":"Shortstop","Height(inches)":75,"Weight(lbs)":195,"Age":32} +{"Name":"Alex Rodriguez","Team":"NYY","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":210,"Age":31} +{"Name":"Johnny Damon","Team":"NYY","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":190,"Age":33} +{"Name":"Bobby Abreu","Team":"NYY","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":211,"Age":32} +{"Name":"Hideki Matsui","Team":"NYY","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":230,"Age":32} +{"Name":"Melky Cabrera","Team":"NYY","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":170,"Age":22} +{"Name":"Kevin Thompson","Team":"NYY","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":185,"Age":27} +{"Name":"Jason Giambi","Team":"NYY","Position":"Designated Hitter","Height(inches)":75,"Weight(lbs)":230,"Age":36} +{"Name":"Mike Mussina","Team":"NYY","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":185,"Age":38} +{"Name":"Carl Pavano","Team":"NYY","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":241,"Age":31} +{"Name":"Andy Pettitte","Team":"NYY","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":225,"Age":34} +{"Name":"Darrell Rasner","Team":"NYY","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":210,"Age":26} +{"Name":"Jeff Karstens","Team":"NYY","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":175,"Age":24} +{"Name":"Humberto Sanchez","Team":"NYY","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":230,"Age":23} +{"Name":"Chien-Ming Wang","Team":"NYY","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":26} +{"Name":"Sean Henn","Team":"NYY","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":215,"Age":25} +{"Name":"Scott Proctor","Team":"NYY","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":198,"Age":30} +{"Name":"Brian Bruney","Team":"NYY","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":226,"Age":25} +{"Name":"Chris Britton","Team":"NYY","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":278,"Age":24} +{"Name":"T.J. Beam","Team":"NYY","Position":"Relief Pitcher","Height(inches)":79,"Weight(lbs)":215,"Age":26} +{"Name":"Jose Veras","Team":"NYY","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":230,"Age":26} +{"Name":"Kyle Farnsworth","Team":"NYY","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":240,"Age":30} +{"Name":"Luis Vizcaino","Team":"NYY","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":184,"Age":32} +{"Name":"Mike Myers","Team":"NYY","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":219,"Age":37} +{"Name":"Mariano Rivera","Team":"NYY","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":170,"Age":37} +{"Name":"Ivan Rodriguez","Team":"DET","Position":"Catcher","Height(inches)":69,"Weight(lbs)":218,"Age":35} +{"Name":"Vance Wilson","Team":"DET","Position":"Catcher","Height(inches)":71,"Weight(lbs)":190,"Age":33} +{"Name":"Sean Casey","Team":"DET","Position":"First Baseman","Height(inches)":76,"Weight(lbs)":225,"Age":32} +{"Name":"Chris Shelton","Team":"DET","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":220,"Age":26} +{"Name":"Omar Infante","Team":"DET","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":176,"Age":25} +{"Name":"Placido Polanco","Team":"DET","Position":"Second Baseman","Height(inches)":70,"Weight(lbs)":190,"Age":31} +{"Name":"Neifi Perez","Team":"DET","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":197,"Age":33} +{"Name":"Carlos Guillen","Team":"DET","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":204,"Age":31} +{"Name":"Ramon Santiago","Team":"DET","Position":"Shortstop","Height(inches)":71,"Weight(lbs)":167,"Age":27} +{"Name":"Tony Giarratano","Team":"DET","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":180,"Age":24} +{"Name":"Brandon Inge","Team":"DET","Position":"Third Baseman","Height(inches)":71,"Weight(lbs)":195,"Age":29} +{"Name":"Craig Monroe","Team":"DET","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":220,"Age":30} +{"Name":"Magglio Ordo?ez","Team":"DET","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":215,"Age":33} +{"Name":"Curtis Granderson","Team":"DET","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":185,"Age":25} +{"Name":"Brent Clevlen","Team":"DET","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":190,"Age":23} +{"Name":"Marcus Thames","Team":"DET","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":205,"Age":29} +{"Name":"Gary Sheffield","Team":"DET","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":205,"Age":38} +{"Name":"Mike Rabelo","Team":"DET","Position":"Designated Hitter","Height(inches)":73,"Weight(lbs)":200,"Age":27} +{"Name":"Zach Miner","Team":"DET","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":24} +{"Name":"Jeremy Bonderman","Team":"DET","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":210,"Age":24} +{"Name":"Nate Robertson","Team":"DET","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":215,"Age":29} +{"Name":"Justin Verlander","Team":"DET","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":24} +{"Name":"Virgil Vasquez","Team":"DET","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":205,"Age":24} +{"Name":"Kenny Rogers","Team":"DET","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":211,"Age":42} +{"Name":"Mike Maroth","Team":"DET","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":190,"Age":29} +{"Name":"Fernando Rodney","Team":"DET","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":208,"Age":29} +{"Name":"Chad Durbin","Team":"DET","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":29} +{"Name":"Jason Grilli","Team":"DET","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":210,"Age":30} +{"Name":"Jose Mesa","Team":"DET","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":232,"Age":40} +{"Name":"Todd Jones","Team":"DET","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":230,"Age":38} +{"Name":"Joel Zumaya","Team":"DET","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":210,"Age":22} +{"Name":"Jordan Tata","Team":"DET","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":220,"Age":25} +{"Name":"Andrew Miller","Team":"DET","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":210,"Age":21} +{"Name":"Yorman Bazardo","Team":"DET","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":202,"Age":22} +{"Name":"Wilfredo Ledezma","Team":"DET","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":212,"Age":26} +{"Name":"Roman Colon","Team":"DET","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":225,"Age":27} +{"Name":"Edward Campusano","Team":"DET","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":170,"Age":24} +{"Name":"Rene Rivera","Team":"SEA","Position":"Catcher","Height(inches)":70,"Weight(lbs)":190,"Age":23} +{"Name":"Kenji Johjima","Team":"SEA","Position":"Catcher","Height(inches)":72,"Weight(lbs)":200,"Age":30} +{"Name":"Richie Sexson","Team":"SEA","Position":"First Baseman","Height(inches)":80,"Weight(lbs)":237,"Age":32} +{"Name":"Ben Broussard","Team":"SEA","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":220,"Age":30} +{"Name":"Jose Lopez","Team":"SEA","Position":"Second Baseman","Height(inches)":74,"Weight(lbs)":170,"Age":23} +{"Name":"Jose Vidro","Team":"SEA","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":193,"Age":32} +{"Name":"Yuniesky Betancourt","Team":"SEA","Position":"Shortstop","Height(inches)":70,"Weight(lbs)":190,"Age":25} +{"Name":"Oswaldo Navarro","Team":"SEA","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":150,"Age":22} +{"Name":"Adrian Beltre","Team":"SEA","Position":"Third Baseman","Height(inches)":71,"Weight(lbs)":220,"Age":27} +{"Name":"Raul Ibanez","Team":"SEA","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":200,"Age":34} +{"Name":"Jose Guillen","Team":"SEA","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":190,"Age":30} +{"Name":"Jeremy Reed","Team":"SEA","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":185,"Age":25} +{"Name":"Willie Bloomquist","Team":"SEA","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":185,"Age":29} +{"Name":"Adam Jones","Team":"SEA","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":200,"Age":21} +{"Name":"Ichiro Suzuki","Team":"SEA","Position":"Outfielder","Height(inches)":69,"Weight(lbs)":172,"Age":33} +{"Name":"Mike Morse","Team":"SEA","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":220,"Age":24} +{"Name":"Felix Hernandez","Team":"SEA","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":225,"Age":20} +{"Name":"Ryan Feierabend","Team":"SEA","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":21} +{"Name":"Sean White","Team":"SEA","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":195,"Age":25} +{"Name":"Horacio Ramirez","Team":"SEA","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":219,"Age":27} +{"Name":"Cha Baek","Team":"SEA","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":190,"Age":26} +{"Name":"Miguel Batista","Team":"SEA","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":197,"Age":36} +{"Name":"Jeff Weaver","Team":"SEA","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":30} +{"Name":"Jarrod Washburn","Team":"SEA","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":195,"Age":32} +{"Name":"George Sherrill","Team":"SEA","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":210,"Age":29} +{"Name":"Julio Mateo","Team":"SEA","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":177,"Age":29} +{"Name":"J.J. Putz","Team":"SEA","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":220,"Age":30} +{"Name":"Chris Reitsma","Team":"SEA","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":235,"Age":29} +{"Name":"Cesar Jimenez","Team":"SEA","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":180,"Age":22} +{"Name":"Eric O'Flaherty","Team":"SEA","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":195,"Age":22} +{"Name":"Jon Huber","Team":"SEA","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":195,"Age":25} +{"Name":"Jake Woods","Team":"SEA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":25} +{"Name":"Sean Green","Team":"SEA","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":230,"Age":27} +{"Name":"Mark Lowe","Team":"SEA","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":23} +{"Name":"Josh Paul","Team":"TB","Position":"Catcher","Height(inches)":73,"Weight(lbs)":200,"Age":31} +{"Name":"Dioner Navarro","Team":"TB","Position":"Catcher","Height(inches)":70,"Weight(lbs)":190,"Age":23} +{"Name":"Shawn Riggans","Team":"TB","Position":"Catcher","Height(inches)":74,"Weight(lbs)":190,"Age":26} +{"Name":"Ty Wigginton","Team":"TB","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":200,"Age":29} +{"Name":"Brendan Harris","Team":"TB","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":200,"Age":26} +{"Name":"Jorge Cantu","Team":"TB","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":184,"Age":25} +{"Name":"Ben Zobrist","Team":"TB","Position":"Shortstop","Height(inches)":75,"Weight(lbs)":200,"Age":25} +{"Name":"B.J. Upton","Team":"TB","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":180,"Age":22} +{"Name":"Carl Crawford","Team":"TB","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":219,"Age":25} +{"Name":"Rocco Baldelli","Team":"TB","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":187,"Age":25} +{"Name":"Greg Norton","Team":"TB","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":200,"Age":34} +{"Name":"Elijah Dukes","Team":"TB","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":220,"Age":22} +{"Name":"Delmon Young","Team":"TB","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":205,"Age":21} +{"Name":"Jonny Gomes","Team":"TB","Position":"Designated Hitter","Height(inches)":73,"Weight(lbs)":205,"Age":26} +{"Name":"Edwin Jackson","Team":"TB","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":23} +{"Name":"Scott Kazmir","Team":"TB","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":170,"Age":23} +{"Name":"Casey Fossum","Team":"TB","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":160,"Age":29} +{"Name":"Jae Seo","Team":"TB","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":215,"Age":29} +{"Name":"J.P. Howell","Team":"TB","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":175,"Age":23} +{"Name":"Tim Corcoran","Team":"TB","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":205,"Age":28} +{"Name":"Jason Hammel","Team":"TB","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":200,"Age":24} +{"Name":"James Shields","Team":"TB","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":214,"Age":25} +{"Name":"Brian Stokes","Team":"TB","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":27} +{"Name":"Juan Salas","Team":"TB","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":28} +{"Name":"Jeff Ridgway","Team":"TB","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":180,"Age":26} +{"Name":"Ruddy Lugo","Team":"TB","Position":"Relief Pitcher","Height(inches)":70,"Weight(lbs)":205,"Age":26} +{"Name":"Jae-Kuk Ryu","Team":"TB","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":220,"Age":23} +{"Name":"Chad Orvella","Team":"TB","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":190,"Age":26} +{"Name":"Dan Miceli","Team":"TB","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":215,"Age":36} +{"Name":"Seth McClung","Team":"TB","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":235,"Age":26} +{"Name":"Jon Switzer","Team":"TB","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":191,"Age":27} +{"Name":"Shawn Camp","Team":"TB","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":31} +{"Name":"Scott Dohmann","Team":"TB","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":181,"Age":29} +{"Name":"Jason LaRue","Team":"KC","Position":"Catcher","Height(inches)":71,"Weight(lbs)":200,"Age":32} +{"Name":"John Buck","Team":"KC","Position":"Catcher","Height(inches)":75,"Weight(lbs)":210,"Age":26} +{"Name":"Ryan Shealy","Team":"KC","Position":"First Baseman","Height(inches)":77,"Weight(lbs)":240,"Age":27} +{"Name":"Ross Gload","Team":"KC","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":185,"Age":30} +{"Name":"Esteban German","Team":"KC","Position":"Second Baseman","Height(inches)":69,"Weight(lbs)":165,"Age":29} +{"Name":"Mark Grudzielanek","Team":"KC","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":190,"Age":36} +{"Name":"Angel Sanchez","Team":"KC","Position":"Second Baseman","Height(inches)":74,"Weight(lbs)":185,"Age":23} +{"Name":"Angel Berroa","Team":"KC","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":175,"Age":29} +{"Name":"Andres Blanco","Team":"KC","Position":"Shortstop","Height(inches)":70,"Weight(lbs)":155,"Age":22} +{"Name":"Mark Teahen","Team":"KC","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":210,"Age":25} +{"Name":"Joey Gathright","Team":"KC","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":170,"Age":25} +{"Name":"David DeJesus","Team":"KC","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":175,"Age":27} +{"Name":"Shane Costa","Team":"KC","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":220,"Age":25} +{"Name":"Mitch Maier","Team":"KC","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":210,"Age":24} +{"Name":"Reggie Sanders","Team":"KC","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":205,"Age":39} +{"Name":"Emil Brown","Team":"KC","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":200,"Age":32} +{"Name":"Mike Sweeney","Team":"KC","Position":"Designated Hitter","Height(inches)":75,"Weight(lbs)":225,"Age":33} +{"Name":"John Bale","Team":"KC","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":205,"Age":32} +{"Name":"Luke Hudson","Team":"KC","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":195,"Age":29} +{"Name":"Scott Elarton","Team":"KC","Position":"Starting Pitcher","Height(inches)":80,"Weight(lbs)":240,"Age":31} +{"Name":"Odalis Perez","Team":"KC","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":150,"Age":29} +{"Name":"Gil Meche","Team":"KC","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":28} +{"Name":"Neal Musser","Team":"KC","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":215,"Age":26} +{"Name":"Brian Bannister","Team":"KC","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":202,"Age":26} +{"Name":"Zack Greinke","Team":"KC","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":23} +{"Name":"Jorge De La Rosa","Team":"KC","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":25} +{"Name":"Todd Wellemeyer","Team":"KC","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":205,"Age":28} +{"Name":"Jimmy Gobble","Team":"KC","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":25} +{"Name":"Joel Peralta","Team":"KC","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":160,"Age":30} +{"Name":"Ryan Braun","Team":"KC","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":215,"Age":26} +{"Name":"Joakim Soria","Team":"KC","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":185,"Age":22} +{"Name":"Ken Ray","Team":"KC","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":32} +{"Name":"David Riske","Team":"KC","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":30} +{"Name":"Octavio Dotel","Team":"KC","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":210,"Age":33} +{"Name":"Joe Nelson","Team":"KC","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":185,"Age":32} +{"Name":"Gerald Laird","Team":"TEX","Position":"Catcher","Height(inches)":74,"Weight(lbs)":220,"Age":27} +{"Name":"Miguel Ojeda","Team":"TEX","Position":"Catcher","Height(inches)":74,"Weight(lbs)":190,"Age":32} +{"Name":"Guillermo Quiroz","Team":"TEX","Position":"Catcher","Height(inches)":73,"Weight(lbs)":202,"Age":25} +{"Name":"Chris Stewart","Team":"TEX","Position":"Catcher","Height(inches)":76,"Weight(lbs)":205,"Age":25} +{"Name":"Mark Teixeira","Team":"TEX","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":220,"Age":26} +{"Name":"Ian Kinsler","Team":"TEX","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":175,"Age":24} +{"Name":"Joaquin Arias","Team":"TEX","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":160,"Age":22} +{"Name":"Michael Young","Team":"TEX","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":190,"Age":30} +{"Name":"Hank Blalock","Team":"TEX","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":200,"Age":26} +{"Name":"Marlon Byrd","Team":"TEX","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":229,"Age":29} +{"Name":"Brad Wilkerson","Team":"TEX","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":206,"Age":29} +{"Name":"Sammy Sosa","Team":"TEX","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":220,"Age":38} +{"Name":"Kenny Lofton","Team":"TEX","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":180,"Age":39} +{"Name":"Frank Catalanotto","Team":"TEX","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":195,"Age":32} +{"Name":"Nelson Cruz","Team":"TEX","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":175,"Age":26} +{"Name":"Jason Botts","Team":"TEX","Position":"Designated Hitter","Height(inches)":77,"Weight(lbs)":250,"Age":26} +{"Name":"Robinson Tejeda","Team":"TEX","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":188,"Age":24} +{"Name":"John Rheinecker","Team":"TEX","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":230,"Age":27} +{"Name":"Edinson Volquez","Team":"TEX","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":23} +{"Name":"A.J. Murray","Team":"TEX","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":24} +{"Name":"Brandon McCarthy","Team":"TEX","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":190,"Age":23} +{"Name":"Vicente Padilla","Team":"TEX","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":219,"Age":29} +{"Name":"Kevin Millwood","Team":"TEX","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":235,"Age":32} +{"Name":"John Koronka","Team":"TEX","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":26} +{"Name":"Frank Francisco","Team":"TEX","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":27} +{"Name":"Francisco Cruceta","Team":"TEX","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":25} +{"Name":"Akinori Otsuka","Team":"TEX","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":200,"Age":35} +{"Name":"Eric Gagne","Team":"TEX","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":234,"Age":31} +{"Name":"Ron Mahay","Team":"TEX","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":185,"Age":35} +{"Name":"Joaquin Benoit","Team":"TEX","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":220,"Age":29} +{"Name":"Rick Bauer","Team":"TEX","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":223,"Age":30} +{"Name":"Josh Rupe","Team":"TEX","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":24} +{"Name":"Wes Littleton","Team":"TEX","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":210,"Age":24} +{"Name":"C.J. Wilson","Team":"TEX","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":26} +{"Name":"Scott Feldman","Team":"TEX","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":210,"Age":24} +{"Name":"Gregg Zaun","Team":"TOR","Position":"Catcher","Height(inches)":70,"Weight(lbs)":190,"Age":35} +{"Name":"Jason Phillips","Team":"TOR","Position":"Catcher","Height(inches)":73,"Weight(lbs)":177,"Age":30} +{"Name":"Lyle Overbay","Team":"TOR","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":227,"Age":30} +{"Name":"Russ Adams","Team":"TOR","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":180,"Age":26} +{"Name":"Aaron Hill","Team":"TOR","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":195,"Age":24} +{"Name":"Jason Smith","Team":"TOR","Position":"Second Baseman","Height(inches)":75,"Weight(lbs)":199,"Age":29} +{"Name":"John McDonald","Team":"TOR","Position":"Shortstop","Height(inches)":71,"Weight(lbs)":175,"Age":32} +{"Name":"Royce Clayton","Team":"TOR","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":185,"Age":37} +{"Name":"Troy Glaus","Team":"TOR","Position":"Third Baseman","Height(inches)":77,"Weight(lbs)":240,"Age":30} +{"Name":"John Hattig","Team":"TOR","Position":"Third Baseman","Height(inches)":74,"Weight(lbs)":210,"Age":27} +{"Name":"Reed Johnson","Team":"TOR","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":180,"Age":30} +{"Name":"Alex Rios","Team":"TOR","Position":"Outfielder","Height(inches)":77,"Weight(lbs)":194,"Age":26} +{"Name":"Vernon Wells","Team":"TOR","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":225,"Age":28} +{"Name":"Frank Thomas","Team":"TOR","Position":"Designated Hitter","Height(inches)":77,"Weight(lbs)":275,"Age":38} +{"Name":"Adam Lind","Team":"TOR","Position":"Designated Hitter","Height(inches)":74,"Weight(lbs)":195,"Age":23} +{"Name":"Shaun Marcum","Team":"TOR","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":25} +{"Name":"Casey Janssen","Team":"TOR","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":205,"Age":25} +{"Name":"Gustavo Chacin","Team":"TOR","Position":"Starting Pitcher","Height(inches)":71,"Weight(lbs)":193,"Age":26} +{"Name":"A.J. Burnett","Team":"TOR","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":230,"Age":30} +{"Name":"Roy Halladay","Team":"TOR","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":230,"Age":29} +{"Name":"John Thomson","Team":"TOR","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":220,"Age":33} +{"Name":"Tomo Ohka","Team":"TOR","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":30} +{"Name":"B.J. Ryan","Team":"TOR","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":249,"Age":31} +{"Name":"Scott Downs","Team":"TOR","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":30} +{"Name":"Brian Tallet","Team":"TOR","Position":"Relief Pitcher","Height(inches)":79,"Weight(lbs)":208,"Age":29} +{"Name":"Matt Roney","Team":"TOR","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":245,"Age":27} +{"Name":"Tracy Thorpe","Team":"TOR","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":250,"Age":26} +{"Name":"Jean Machi","Team":"TOR","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":160,"Age":24} +{"Name":"Brandon League","Team":"TOR","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":192,"Age":23} +{"Name":"Dustin McGowan","Team":"TOR","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":220,"Age":24} +{"Name":"Jason Frasor","Team":"TOR","Position":"Relief Pitcher","Height(inches)":70,"Weight(lbs)":170,"Age":29} +{"Name":"Francisco Rosario","Team":"TOR","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":197,"Age":26} +{"Name":"Davis Romero","Team":"TOR","Position":"Relief Pitcher","Height(inches)":70,"Weight(lbs)":155,"Age":23} +{"Name":"Jeremy Accardo","Team":"TOR","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":25} +{"Name":"Mike Redmond","Team":"MIN","Position":"Catcher","Height(inches)":71,"Weight(lbs)":200,"Age":35} +{"Name":"Joe Mauer","Team":"MIN","Position":"Catcher","Height(inches)":76,"Weight(lbs)":220,"Age":23} +{"Name":"Chris Heintz","Team":"MIN","Position":"Catcher","Height(inches)":73,"Weight(lbs)":210,"Age":32} +{"Name":"Justin Morneau","Team":"MIN","Position":"First Baseman","Height(inches)":76,"Weight(lbs)":228,"Age":25} +{"Name":"Luis Castillo","Team":"MIN","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":190,"Age":31} +{"Name":"Alexi Casilla","Team":"MIN","Position":"Second Baseman","Height(inches)":69,"Weight(lbs)":160,"Age":22} +{"Name":"Alejandro Machado","Team":"MIN","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":184,"Age":24} +{"Name":"Jason Bartlett","Team":"MIN","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":180,"Age":27} +{"Name":"Luis Rodriguez","Team":"MIN","Position":"Third Baseman","Height(inches)":69,"Weight(lbs)":180,"Age":26} +{"Name":"Jeff Cirillo","Team":"MIN","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":200,"Age":37} +{"Name":"Nick Punto","Team":"MIN","Position":"Third Baseman","Height(inches)":69,"Weight(lbs)":176,"Age":29} +{"Name":"Jason Tyner","Team":"MIN","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":160,"Age":29} +{"Name":"Michael Cuddyer","Team":"MIN","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":222,"Age":27} +{"Name":"Torii Hunter","Team":"MIN","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":211,"Age":31} +{"Name":"Lew Ford","Team":"MIN","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":195,"Age":30} +{"Name":"Jason Kubel","Team":"MIN","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":200,"Age":24} +{"Name":"Josh Rabe","Team":"MIN","Position":"Designated Hitter","Height(inches)":74,"Weight(lbs)":210,"Age":28} +{"Name":"Rondell White","Team":"MIN","Position":"Designated Hitter","Height(inches)":73,"Weight(lbs)":225,"Age":35} +{"Name":"Ramon Ortiz","Team":"MIN","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":175,"Age":33} +{"Name":"Johan Santana","Team":"MIN","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":206,"Age":27} +{"Name":"Carlos Silva","Team":"MIN","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":240,"Age":27} +{"Name":"Matt Garza","Team":"MIN","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":185,"Age":23} +{"Name":"Boof Bonser","Team":"MIN","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":260,"Age":25} +{"Name":"Francisco Liriano","Team":"MIN","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":185,"Age":23} +{"Name":"Scott Baker","Team":"MIN","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":221,"Age":25} +{"Name":"Pat Neshek","Team":"MIN","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":205,"Age":26} +{"Name":"Glen Perkins","Team":"MIN","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":200,"Age":24} +{"Name":"Julio DePaula","Team":"MIN","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":170,"Age":24} +{"Name":"Juan Rincon","Team":"MIN","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":201,"Age":28} +{"Name":"Jesse Crain","Team":"MIN","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":205,"Age":25} +{"Name":"Matt Guerrier","Team":"MIN","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":185,"Age":28} +{"Name":"Joe Nathan","Team":"MIN","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":205,"Age":32} +{"Name":"Dennys Reyes","Team":"MIN","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":245,"Age":29} +{"Name":"Brayan Pe?a","Team":"ATL","Position":"Catcher","Height(inches)":71,"Weight(lbs)":220,"Age":25} +{"Name":"Brian McCann","Team":"ATL","Position":"Catcher","Height(inches)":75,"Weight(lbs)":210,"Age":23} +{"Name":"Craig Wilson","Team":"ATL","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":220,"Age":30} +{"Name":"Chris Woodward","Team":"ATL","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":185,"Age":30} +{"Name":"Pete Orr","Team":"ATL","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":175,"Age":27} +{"Name":"Martin Prado","Team":"ATL","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":170,"Age":23} +{"Name":"Tony Pe?a","Team":"ATL","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":180,"Age":25} +{"Name":"Edgar Renteria","Team":"ATL","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":200,"Age":31} +{"Name":"Chipper Jones","Team":"ATL","Position":"Third Baseman","Height(inches)":76,"Weight(lbs)":210,"Age":34} +{"Name":"Willy Aybar","Team":"ATL","Position":"Third Baseman","Height(inches)":72,"Weight(lbs)":175,"Age":23} +{"Name":"Jeff Francoeur","Team":"ATL","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":220,"Age":23} +{"Name":"Matt Diaz","Team":"ATL","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":206,"Age":28} +{"Name":"Kelly Johnson","Team":"ATL","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":180,"Age":25} +{"Name":"Andruw Jones","Team":"ATL","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":210,"Age":29} +{"Name":"Ryan Langerhans","Team":"ATL","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":195,"Age":27} +{"Name":"Scott Thorman","Team":"ATL","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":200,"Age":25} +{"Name":"T.J. Bohn","Team":"ATL","Position":"Outfielder","Height(inches)":77,"Weight(lbs)":200,"Age":27} +{"Name":"Tim Hudson","Team":"ATL","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":164,"Age":31} +{"Name":"Jonathan Johnson","Team":"ATL","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":32} +{"Name":"John Smoltz","Team":"ATL","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":220,"Age":39} +{"Name":"Mike Hampton","Team":"ATL","Position":"Starting Pitcher","Height(inches)":70,"Weight(lbs)":195,"Age":34} +{"Name":"Kyle Davies","Team":"ATL","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":205,"Age":23} +{"Name":"Chuck James","Team":"ATL","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":170,"Age":25} +{"Name":"Phil Stockman","Team":"ATL","Position":"Relief Pitcher","Height(inches)":80,"Weight(lbs)":240,"Age":27} +{"Name":"Macay McBride","Team":"ATL","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":210,"Age":24} +{"Name":"Joey Devine","Team":"ATL","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":195,"Age":23} +{"Name":"Peter Moylan","Team":"ATL","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":28} +{"Name":"Mike Gonzalez","Team":"ATL","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":205,"Age":28} +{"Name":"Lance Cormier","Team":"ATL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":192,"Age":26} +{"Name":"Blaine Boyer","Team":"ATL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":25} +{"Name":"Manny Acosta","Team":"ATL","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":170,"Age":25} +{"Name":"Bob Wickman","Team":"ATL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":240,"Age":38} +{"Name":"Tanyon Sturtze","Team":"ATL","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":36} +{"Name":"Oscar Villarreal","Team":"ATL","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":205,"Age":25} +{"Name":"Rafael Soriano","Team":"ATL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":175,"Age":27} +{"Name":"Chad Paronto","Team":"ATL","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":250,"Age":31} +{"Name":"Tyler Yates","Team":"ATL","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":220,"Age":29} +{"Name":"Henry Blanco","Team":"CHC","Position":"Catcher","Height(inches)":71,"Weight(lbs)":224,"Age":35} +{"Name":"Michael Barrett","Team":"CHC","Position":"Catcher","Height(inches)":75,"Weight(lbs)":210,"Age":30} +{"Name":"Geovany Soto","Team":"CHC","Position":"Catcher","Height(inches)":73,"Weight(lbs)":195,"Age":24} +{"Name":"Scott Moore","Team":"CHC","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":180,"Age":23} +{"Name":"Derrek Lee","Team":"CHC","Position":"First Baseman","Height(inches)":77,"Weight(lbs)":245,"Age":31} +{"Name":"Ryan Theriot","Team":"CHC","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":175,"Age":27} +{"Name":"Ronny Cedeno","Team":"CHC","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":180,"Age":24} +{"Name":"Aramis Ramirez","Team":"CHC","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":215,"Age":28} +{"Name":"Cesar Izturis","Team":"CHC","Position":"Third Baseman","Height(inches)":69,"Weight(lbs)":175,"Age":27} +{"Name":"Alfonso Soriano","Team":"CHC","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":180,"Age":31} +{"Name":"Jacque Jones","Team":"CHC","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":195,"Age":31} +{"Name":"Daryle Ward","Team":"CHC","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":230,"Age":31} +{"Name":"Cliff Floyd","Team":"CHC","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":230,"Age":34} +{"Name":"Mark DeRosa","Team":"CHC","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":205,"Age":32} +{"Name":"Matt Murton","Team":"CHC","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":215,"Age":25} +{"Name":"Buck Coats","Team":"CHC","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":195,"Age":24} +{"Name":"Angel Pagan","Team":"CHC","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":180,"Age":25} +{"Name":"Sean Marshall","Team":"CHC","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":205,"Age":24} +{"Name":"Carlos Marmol","Team":"CHC","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":24} +{"Name":"Ryan O'Malley","Team":"CHC","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":26} +{"Name":"Juan Mateo","Team":"CHC","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":24} +{"Name":"Rich Hill","Team":"CHC","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":190,"Age":26} +{"Name":"Angel Guzman","Team":"CHC","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":25} +{"Name":"Wade Miller","Team":"CHC","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":30} +{"Name":"Jason Marquis","Team":"CHC","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":210,"Age":28} +{"Name":"Carlos Zambrano","Team":"CHC","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":255,"Age":25} +{"Name":"Ted Lilly","Team":"CHC","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":31} +{"Name":"Mark Prior","Team":"CHC","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":230,"Age":26} +{"Name":"Neal Cotts","Team":"CHC","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":26} +{"Name":"Will Ohman","Team":"CHC","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":205,"Age":29} +{"Name":"Scott Eyre","Team":"CHC","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":210,"Age":34} +{"Name":"Kerry Wood","Team":"CHC","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":225,"Age":29} +{"Name":"Ryan Dempster","Team":"CHC","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":215,"Age":29} +{"Name":"Bob Howry","Team":"CHC","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":220,"Age":33} +{"Name":"Mike Wuertz","Team":"CHC","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":205,"Age":28} +{"Name":"Roberto Novoa","Team":"CHC","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":27} +{"Name":"Chris Snyder","Team":"ARZ","Position":"Catcher","Height(inches)":75,"Weight(lbs)":220,"Age":26} +{"Name":"Miguel Montero","Team":"ARZ","Position":"Catcher","Height(inches)":71,"Weight(lbs)":197,"Age":23} +{"Name":"Conor Jackson","Team":"ARZ","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":225,"Age":24} +{"Name":"Robby Hammock","Team":"ARZ","Position":"First Baseman","Height(inches)":70,"Weight(lbs)":187,"Age":29} +{"Name":"Tony Clark","Team":"ARZ","Position":"First Baseman","Height(inches)":79,"Weight(lbs)":245,"Age":34} +{"Name":"Orlando Hudson","Team":"ARZ","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":185,"Age":29} +{"Name":"Stephen Drew","Team":"ARZ","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":185,"Age":23} +{"Name":"Alberto Callaspo","Team":"ARZ","Position":"Shortstop","Height(inches)":70,"Weight(lbs)":175,"Age":23} +{"Name":"Chad Tracy","Team":"ARZ","Position":"Third Baseman","Height(inches)":74,"Weight(lbs)":200,"Age":26} +{"Name":"Chris Young","Team":"ARZ","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":180,"Age":23} +{"Name":"Scott Hairston","Team":"ARZ","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":188,"Age":26} +{"Name":"Carlos Quentin","Team":"ARZ","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":225,"Age":24} +{"Name":"Jeff DaVanon","Team":"ARZ","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":200,"Age":33} +{"Name":"Eric Byrnes","Team":"ARZ","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":210,"Age":31} +{"Name":"Livan Hernandez","Team":"ARZ","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":245,"Age":32} +{"Name":"Doug Davis","Team":"ARZ","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":213,"Age":31} +{"Name":"Randy Johnson","Team":"ARZ","Position":"Starting Pitcher","Height(inches)":82,"Weight(lbs)":231,"Age":43} +{"Name":"Juan Cruz","Team":"ARZ","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":165,"Age":28} +{"Name":"Brandon Webb","Team":"ARZ","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":228,"Age":27} +{"Name":"Enrique Gonzalez","Team":"ARZ","Position":"Starting Pitcher","Height(inches)":70,"Weight(lbs)":210,"Age":24} +{"Name":"Dana Eveland","Team":"ARZ","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":250,"Age":23} +{"Name":"Brandon Medders","Team":"ARZ","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":191,"Age":27} +{"Name":"Tony Pe?a","Team":"ARZ","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":25} +{"Name":"Doug Slaten","Team":"ARZ","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":27} +{"Name":"Edgar Gonzalez","Team":"ARZ","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":215,"Age":24} +{"Name":"Jose Valverde","Team":"ARZ","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":254,"Age":27} +{"Name":"Jorge Julio","Team":"ARZ","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":232,"Age":27} +{"Name":"Brandon Lyon","Team":"ARZ","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":27} +{"Name":"Miguel Olivo","Team":"FLA","Position":"Catcher","Height(inches)":72,"Weight(lbs)":215,"Age":28} +{"Name":"Matt Treanor","Team":"FLA","Position":"Catcher","Height(inches)":74,"Weight(lbs)":220,"Age":30} +{"Name":"Mike Jacobs","Team":"FLA","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":180,"Age":26} +{"Name":"Dan Uggla","Team":"FLA","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":200,"Age":26} +{"Name":"Robert Andino","Team":"FLA","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":170,"Age":22} +{"Name":"Hanley Ramirez","Team":"FLA","Position":"Shortstop","Height(inches)":75,"Weight(lbs)":195,"Age":23} +{"Name":"Miguel Cabrera","Team":"FLA","Position":"Third Baseman","Height(inches)":74,"Weight(lbs)":210,"Age":23} +{"Name":"Aaron Boone","Team":"FLA","Position":"Third Baseman","Height(inches)":74,"Weight(lbs)":200,"Age":33} +{"Name":"Joe Borchard","Team":"FLA","Position":"Outfielder","Height(inches)":77,"Weight(lbs)":220,"Age":28} +{"Name":"Alfredo Amezaga","Team":"FLA","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":165,"Age":29} +{"Name":"Cody Ross","Team":"FLA","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":180,"Age":26} +{"Name":"Josh Willingham","Team":"FLA","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":200,"Age":28} +{"Name":"Jeremy Hermida","Team":"FLA","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":200,"Age":23} +{"Name":"Eric Reed","Team":"FLA","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":170,"Age":26} +{"Name":"Reggi Abercrombie","Team":"FLA","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":224,"Age":26} +{"Name":"Ricky Nolasco","Team":"FLA","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":24} +{"Name":"Anibal Sanchez","Team":"FLA","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":23} +{"Name":"Scott Olsen","Team":"FLA","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":198,"Age":23} +{"Name":"Josh Johnson","Team":"FLA","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":240,"Age":23} +{"Name":"Dontrelle Willis","Team":"FLA","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":239,"Age":25} +{"Name":"Logan Kensing","Team":"FLA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":185,"Age":24} +{"Name":"Sergio Mitre","Team":"FLA","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":210,"Age":26} +{"Name":"Kevin Gregg","Team":"FLA","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":220,"Age":28} +{"Name":"Travis Bowyer","Team":"FLA","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":25} +{"Name":"Renyel Pinto","Team":"FLA","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":195,"Age":24} +{"Name":"Randy Messenger","Team":"FLA","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":220,"Age":25} +{"Name":"Yusmeiro Petit","Team":"FLA","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":230,"Age":22} +{"Name":"Carlos Martinez","Team":"FLA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":170,"Age":24} +{"Name":"Taylor Tankersley","Team":"FLA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":220,"Age":23} +{"Name":"Henry Owens","Team":"FLA","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":230,"Age":27} +{"Name":"Jose Garcia","Team":"FLA","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":165,"Age":22} +{"Name":"Matt Lindstrom","Team":"FLA","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":205,"Age":27} +{"Name":"Javier Valentin","Team":"CIN","Position":"Catcher","Height(inches)":70,"Weight(lbs)":192,"Age":31} +{"Name":"Chad Moeller","Team":"CIN","Position":"Catcher","Height(inches)":75,"Weight(lbs)":210,"Age":32} +{"Name":"David Ross","Team":"CIN","Position":"Catcher","Height(inches)":74,"Weight(lbs)":205,"Age":29} +{"Name":"Joey Votto","Team":"CIN","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":200,"Age":23} +{"Name":"Scott Hatteberg","Team":"CIN","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":210,"Age":37} +{"Name":"Brandon Phillips","Team":"CIN","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":185,"Age":25} +{"Name":"Juan Castro","Team":"CIN","Position":"Shortstop","Height(inches)":71,"Weight(lbs)":195,"Age":34} +{"Name":"Alex Gonzalez","Team":"CIN","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":202,"Age":30} +{"Name":"Mark Bellhorn","Team":"CIN","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":205,"Age":32} +{"Name":"Edwin Encarnacion","Team":"CIN","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":195,"Age":24} +{"Name":"Jeff Keppinger","Team":"CIN","Position":"Third Baseman","Height(inches)":72,"Weight(lbs)":180,"Age":26} +{"Name":"Norris Hopper","Team":"CIN","Position":"Outfielder","Height(inches)":69,"Weight(lbs)":200,"Age":27} +{"Name":"Chris Denorfia","Team":"CIN","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":185,"Age":26} +{"Name":"Adam Dunn","Team":"CIN","Position":"Outfielder","Height(inches)":78,"Weight(lbs)":240,"Age":27} +{"Name":"Bubba Crosby","Team":"CIN","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":185,"Age":30} +{"Name":"Jeff Conine","Team":"CIN","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":220,"Age":40} +{"Name":"Ken Griffey Jr.","Team":"CIN","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":205,"Age":37} +{"Name":"Josh Hamilton","Team":"CIN","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":205,"Age":25} +{"Name":"Ryan Freel","Team":"CIN","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":180,"Age":30} +{"Name":"Kyle Lohse","Team":"CIN","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":201,"Age":28} +{"Name":"Bronson Arroyo","Team":"CIN","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":190,"Age":30} +{"Name":"Eric Milton","Team":"CIN","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":208,"Age":31} +{"Name":"Aaron Harang","Team":"CIN","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":240,"Age":28} +{"Name":"Kirk Saarloos","Team":"CIN","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":190,"Age":27} +{"Name":"Elizardo Ramirez","Team":"CIN","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":24} +{"Name":"Todd Coffey","Team":"CIN","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":230,"Age":26} +{"Name":"Brian Shackelford","Team":"CIN","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":195,"Age":30} +{"Name":"Bill Bray","Team":"CIN","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":215,"Age":23} +{"Name":"Bobby Livingston","Team":"CIN","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":24} +{"Name":"Matt Belisle","Team":"CIN","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":195,"Age":26} +{"Name":"Gary Majewski","Team":"CIN","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":215,"Age":27} +{"Name":"Mike Stanton","Team":"CIN","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":215,"Age":39} +{"Name":"Brad Salmon","Team":"CIN","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":220,"Age":27} +{"Name":"Jared Burton","Team":"CIN","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":220,"Age":25} +{"Name":"David Weathers","Team":"CIN","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":230,"Age":37} +{"Name":"Rheal Cormier","Team":"CIN","Position":"Relief Pitcher","Height(inches)":70,"Weight(lbs)":195,"Age":39} +{"Name":"Yorvit Torrealba","Team":"COL","Position":"Catcher","Height(inches)":71,"Weight(lbs)":190,"Age":28} +{"Name":"Chris Iannetta","Team":"COL","Position":"Catcher","Height(inches)":71,"Weight(lbs)":195,"Age":23} +{"Name":"Alvin Colina","Team":"COL","Position":"Catcher","Height(inches)":75,"Weight(lbs)":209,"Age":25} +{"Name":"Todd Helton","Team":"COL","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":204,"Age":33} +{"Name":"Jamey Carroll","Team":"COL","Position":"Second Baseman","Height(inches)":69,"Weight(lbs)":170,"Age":33} +{"Name":"Kaz Matsui","Team":"COL","Position":"Second Baseman","Height(inches)":70,"Weight(lbs)":185,"Age":31} +{"Name":"Troy Tulowitzki","Team":"COL","Position":"Shortstop","Height(inches)":75,"Weight(lbs)":205,"Age":22} +{"Name":"Clint Barmes","Team":"COL","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":175,"Age":27} +{"Name":"Garrett Atkins","Team":"COL","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":210,"Age":27} +{"Name":"Ryan Spilborghs","Team":"COL","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":190,"Age":27} +{"Name":"Cory Sullivan","Team":"COL","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":180,"Age":27} +{"Name":"Jeff Salazar","Team":"COL","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":180,"Age":26} +{"Name":"Willy Taveras","Team":"COL","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":160,"Age":25} +{"Name":"Matt Holliday","Team":"COL","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":235,"Age":27} +{"Name":"Brad Hawpe","Team":"COL","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":200,"Age":27} +{"Name":"Jeff Baker","Team":"COL","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":210,"Age":25} +{"Name":"Javy Lopez","Team":"COL","Position":"Designated Hitter","Height(inches)":75,"Weight(lbs)":224,"Age":36} +{"Name":"Byung-Hyun Kim","Team":"COL","Position":"Starting Pitcher","Height(inches)":69,"Weight(lbs)":180,"Age":28} +{"Name":"Rodrigo Lopez","Team":"COL","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":31} +{"Name":"Brian Lawrence","Team":"COL","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":197,"Age":30} +{"Name":"Josh Fogg","Team":"COL","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":203,"Age":30} +{"Name":"Aaron Cook","Team":"COL","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":205,"Age":28} +{"Name":"Denny Bautista","Team":"COL","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":170,"Age":26} +{"Name":"Ubaldo Jimenez","Team":"COL","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":200,"Age":23} +{"Name":"Jason Hirsh","Team":"COL","Position":"Starting Pitcher","Height(inches)":80,"Weight(lbs)":250,"Age":25} +{"Name":"Jeff Francis","Team":"COL","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":200,"Age":26} +{"Name":"Taylor Buchholz","Team":"COL","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":220,"Age":25} +{"Name":"Ryan Speier","Team":"COL","Position":"Relief Pitcher","Height(inches)":79,"Weight(lbs)":200,"Age":27} +{"Name":"Ramon Ramirez","Team":"COL","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":190,"Age":25} +{"Name":"Manny Corpas","Team":"COL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":170,"Age":24} +{"Name":"Juan Morillo","Team":"COL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":23} +{"Name":"Brian Fuentes","Team":"COL","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":220,"Age":31} +{"Name":"LaTroy Hawkins","Team":"COL","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":215,"Age":34} +{"Name":"Tom Martin","Team":"COL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":206,"Age":36} +{"Name":"Jeremy Affeldt","Team":"COL","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":215,"Age":27} +{"Name":"Paul Lo Duca","Team":"NYM","Position":"Catcher","Height(inches)":70,"Weight(lbs)":185,"Age":34} +{"Name":"Ramon Castro","Team":"NYM","Position":"Catcher","Height(inches)":75,"Weight(lbs)":235,"Age":31} +{"Name":"Julio Franco","Team":"NYM","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":188,"Age":48} +{"Name":"Carlos Delgado","Team":"NYM","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":230,"Age":34} +{"Name":"Jose Valentin","Team":"NYM","Position":"Second Baseman","Height(inches)":70,"Weight(lbs)":195,"Age":37} +{"Name":"Anderson Hernandez","Team":"NYM","Position":"Second Baseman","Height(inches)":69,"Weight(lbs)":168,"Age":24} +{"Name":"Damion Easley","Team":"NYM","Position":"Shortstop","Height(inches)":71,"Weight(lbs)":190,"Age":37} +{"Name":"Jose Reyes","Team":"NYM","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":160,"Age":23} +{"Name":"David Wright","Team":"NYM","Position":"Third Baseman","Height(inches)":72,"Weight(lbs)":200,"Age":24} +{"Name":"Ben Johnson","Team":"NYM","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":200,"Age":25} +{"Name":"Endy Chavez","Team":"NYM","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":189,"Age":29} +{"Name":"David Newhan","Team":"NYM","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":180,"Age":33} +{"Name":"Carlos Beltran","Team":"NYM","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":190,"Age":29} +{"Name":"Shawn Green","Team":"NYM","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":200,"Age":34} +{"Name":"Moises Alou","Team":"NYM","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":220,"Age":40} +{"Name":"Lastings Milledge","Team":"NYM","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":187,"Age":21} +{"Name":"Alay Soler","Team":"NYM","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":240,"Age":27} +{"Name":"Mike Pelfrey","Team":"NYM","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":190,"Age":23} +{"Name":"Pedro Martinez","Team":"NYM","Position":"Starting Pitcher","Height(inches)":71,"Weight(lbs)":180,"Age":35} +{"Name":"Tom Glavine","Team":"NYM","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":185,"Age":40} +{"Name":"Chan Ho Park","Team":"NYM","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":210,"Age":33} +{"Name":"Orlando Hernandez","Team":"NYM","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":37} +{"Name":"Dave Williams","Team":"NYM","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":219,"Age":27} +{"Name":"Oliver Perez","Team":"NYM","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":190,"Age":25} +{"Name":"John Maine","Team":"NYM","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":193,"Age":25} +{"Name":"Marcos Carvajal","Team":"NYM","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":175,"Age":22} +{"Name":"Ambiorix Burgos","Team":"NYM","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":22} +{"Name":"Jason Vargas","Team":"NYM","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":215,"Age":24} +{"Name":"Jon Adkins","Team":"NYM","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":210,"Age":29} +{"Name":"Juan Padilla","Team":"NYM","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":200,"Age":30} +{"Name":"Duaner Sanchez","Team":"NYM","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":190,"Age":27} +{"Name":"Pedro Feliciano","Team":"NYM","Position":"Relief Pitcher","Height(inches)":70,"Weight(lbs)":185,"Age":30} +{"Name":"Aaron Heilman","Team":"NYM","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":220,"Age":28} +{"Name":"Jorge Sosa","Team":"NYM","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":170,"Age":29} +{"Name":"Scott Schoeneweis","Team":"NYM","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":195,"Age":33} +{"Name":"Guillermo Mota","Team":"NYM","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":205,"Age":33} +{"Name":"Billy Wagner","Team":"NYM","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":195,"Age":35} +{"Name":"Philip Humber","Team":"NYM","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":210,"Age":24} +{"Name":"Brad Ausmus","Team":"HOU","Position":"Catcher","Height(inches)":71,"Weight(lbs)":190,"Age":37} +{"Name":"Humberto Quintero","Team":"HOU","Position":"Catcher","Height(inches)":73,"Weight(lbs)":190,"Age":27} +{"Name":"Hector Gimenez","Team":"HOU","Position":"Catcher","Height(inches)":70,"Weight(lbs)":180,"Age":24} +{"Name":"Lance Berkman","Team":"HOU","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":220,"Age":31} +{"Name":"Mike Lamb","Team":"HOU","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":190,"Age":31} +{"Name":"Mark Loretta","Team":"HOU","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":186,"Age":35} +{"Name":"Craig Biggio","Team":"HOU","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":185,"Age":41} +{"Name":"Brooks Conrad","Team":"HOU","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":190,"Age":27} +{"Name":"Chris Burke","Team":"HOU","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":180,"Age":26} +{"Name":"Eric Bruntlett","Team":"HOU","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":190,"Age":28} +{"Name":"Adam Everett","Team":"HOU","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":170,"Age":30} +{"Name":"Morgan Ensberg","Team":"HOU","Position":"Third Baseman","Height(inches)":74,"Weight(lbs)":210,"Age":31} +{"Name":"Carlos Lee","Team":"HOU","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":240,"Age":30} +{"Name":"Jason Lane","Team":"HOU","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":220,"Age":30} +{"Name":"Orlando Palmeiro","Team":"HOU","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":180,"Age":38} +{"Name":"Luke Scott","Team":"HOU","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":210,"Age":28} +{"Name":"Charlton Jimerson","Team":"HOU","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":210,"Age":27} +{"Name":"Fernando Nieve","Team":"HOU","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":195,"Age":24} +{"Name":"Wandy Rodriguez","Team":"HOU","Position":"Starting Pitcher","Height(inches)":71,"Weight(lbs)":160,"Age":28} +{"Name":"Brandon Backe","Team":"HOU","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":28} +{"Name":"Matt Albers","Team":"HOU","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":205,"Age":24} +{"Name":"Woody Williams","Team":"HOU","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":200,"Age":40} +{"Name":"Roy Oswalt","Team":"HOU","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":185,"Age":29} +{"Name":"Jason Jennings","Team":"HOU","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":245,"Age":28} +{"Name":"Miguel Asencio","Team":"HOU","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":26} +{"Name":"Brad Lidge","Team":"HOU","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":210,"Age":30} +{"Name":"Trever Miller","Team":"HOU","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":33} +{"Name":"David Borkowski","Team":"HOU","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":30} +{"Name":"Dan Wheeler","Team":"HOU","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":222,"Age":29} +{"Name":"Paul Estrada","Team":"HOU","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":215,"Age":24} +{"Name":"Lincoln Holdzkom","Team":"HOU","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":240,"Age":24} +{"Name":"Chris Sampson","Team":"HOU","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":170,"Age":28} +{"Name":"Chad Qualls","Team":"HOU","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":220,"Age":28} +{"Name":"Ezequiel Astacio","Team":"HOU","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":156,"Age":27} +{"Name":"Mike Lieberthal","Team":"LA","Position":"Catcher","Height(inches)":72,"Weight(lbs)":190,"Age":35} +{"Name":"Russell Martin","Team":"LA","Position":"Catcher","Height(inches)":71,"Weight(lbs)":202,"Age":24} +{"Name":"Olmedo Saenz","Team":"LA","Position":"First Baseman","Height(inches)":71,"Weight(lbs)":221,"Age":36} +{"Name":"James Loney","Team":"LA","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":200,"Age":22} +{"Name":"Nomar Garciaparra","Team":"LA","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":190,"Age":33} +{"Name":"Jeff Kent","Team":"LA","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":210,"Age":38} +{"Name":"Ramon Martinez","Team":"LA","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":190,"Age":34} +{"Name":"Marlon Anderson","Team":"LA","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":200,"Age":33} +{"Name":"Rafael Furcal","Team":"LA","Position":"Shortstop","Height(inches)":70,"Weight(lbs)":165,"Age":29} +{"Name":"Wilson Betemit","Team":"LA","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":190,"Age":26} +{"Name":"Andy LaRoche","Team":"LA","Position":"Third Baseman","Height(inches)":71,"Weight(lbs)":185,"Age":23} +{"Name":"Matt Kemp","Team":"LA","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":230,"Age":22} +{"Name":"Andre Ethier","Team":"LA","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":208,"Age":24} +{"Name":"Delwyn Young","Team":"LA","Position":"Outfielder","Height(inches)":68,"Weight(lbs)":209,"Age":24} +{"Name":"Jason Repko","Team":"LA","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":175,"Age":26} +{"Name":"Juan Pierre","Team":"LA","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":180,"Age":29} +{"Name":"Luis Gonzalez","Team":"LA","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":200,"Age":39} +{"Name":"Jason Schmidt","Team":"LA","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":205,"Age":34} +{"Name":"Randy Wolf","Team":"LA","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":200,"Age":30} +{"Name":"Brad Penny","Team":"LA","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":250,"Age":28} +{"Name":"Derek Lowe","Team":"LA","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":210,"Age":33} +{"Name":"Mark Hendrickson","Team":"LA","Position":"Starting Pitcher","Height(inches)":81,"Weight(lbs)":230,"Age":32} +{"Name":"Chad Billingsley","Team":"LA","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":244,"Age":22} +{"Name":"Takashi Saito","Team":"LA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":202,"Age":37} +{"Name":"Jonathan Broxton","Team":"LA","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":240,"Age":22} +{"Name":"Hong-Chih Kuo","Team":"LA","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":200,"Age":25} +{"Name":"Eric Stults","Team":"LA","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":215,"Age":27} +{"Name":"Chin-Hui Tsao","Team":"LA","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":177,"Age":25} +{"Name":"Tim Hamulack","Team":"LA","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":210,"Age":30} +{"Name":"Yhency Brazoban","Team":"LA","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":170,"Age":26} +{"Name":"Brett Tomko","Team":"LA","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":215,"Age":33} +{"Name":"Joe Beimel","Team":"LA","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":217,"Age":29} +{"Name":"Elmer Dessens","Team":"LA","Position":"Relief Pitcher","Height(inches)":70,"Weight(lbs)":198,"Age":36} +{"Name":"Ryan Budde","Team":"PHI","Position":"Catcher","Height(inches)":71,"Weight(lbs)":200,"Age":27} +{"Name":"Rod Barajas","Team":"PHI","Position":"Catcher","Height(inches)":74,"Weight(lbs)":220,"Age":31} +{"Name":"Carlos Ruiz","Team":"PHI","Position":"Catcher","Height(inches)":72,"Weight(lbs)":170,"Age":28} +{"Name":"Chris Coste","Team":"PHI","Position":"Catcher","Height(inches)":73,"Weight(lbs)":200,"Age":34} +{"Name":"Ryan Howard","Team":"PHI","Position":"First Baseman","Height(inches)":76,"Weight(lbs)":230,"Age":27} +{"Name":"Wes Helms","Team":"PHI","Position":"First Baseman","Height(inches)":76,"Weight(lbs)":231,"Age":30} +{"Name":"Chase Utley","Team":"PHI","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":183,"Age":28} +{"Name":"Danny Sandoval","Team":"PHI","Position":"Second Baseman","Height(inches)":71,"Weight(lbs)":192,"Age":27} +{"Name":"Jimmy Rollins","Team":"PHI","Position":"Shortstop","Height(inches)":68,"Weight(lbs)":167,"Age":28} +{"Name":"Abraham Nu?ez","Team":"PHI","Position":"Third Baseman","Height(inches)":71,"Weight(lbs)":190,"Age":30} +{"Name":"Michael Bourn","Team":"PHI","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":180,"Age":24} +{"Name":"Chris Roberson","Team":"PHI","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":180,"Age":27} +{"Name":"Jayson Werth","Team":"PHI","Position":"Outfielder","Height(inches)":77,"Weight(lbs)":215,"Age":27} +{"Name":"Shane Victorino","Team":"PHI","Position":"Outfielder","Height(inches)":69,"Weight(lbs)":160,"Age":26} +{"Name":"Aaron Rowand","Team":"PHI","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":205,"Age":29} +{"Name":"Pat Burrell","Team":"PHI","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":223,"Age":30} +{"Name":"Greg Dobbs","Team":"PHI","Position":"Designated Hitter","Height(inches)":73,"Weight(lbs)":205,"Age":28} +{"Name":"Cole Hamels","Team":"PHI","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":175,"Age":23} +{"Name":"Alfredo Simon","Team":"PHI","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":170,"Age":25} +{"Name":"Scott Mathieson","Team":"PHI","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":23} +{"Name":"Freddy Garcia","Team":"PHI","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":240,"Age":31} +{"Name":"Jamie Moyer","Team":"PHI","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":175,"Age":44} +{"Name":"Jon Lieber","Team":"PHI","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":230,"Age":36} +{"Name":"Brett Myers","Team":"PHI","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":223,"Age":26} +{"Name":"Adam Eaton","Team":"PHI","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":196,"Age":29} +{"Name":"Geoff Geary","Team":"PHI","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":167,"Age":30} +{"Name":"Clay Condrey","Team":"PHI","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":195,"Age":31} +{"Name":"Ryan Madson","Team":"PHI","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":190,"Age":26} +{"Name":"Antonio Alfonseca","Team":"PHI","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":250,"Age":34} +{"Name":"Tom Gordon","Team":"PHI","Position":"Relief Pitcher","Height(inches)":70,"Weight(lbs)":190,"Age":39} +{"Name":"Brian Sanches","Team":"PHI","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":190,"Age":28} +{"Name":"Jim Ed Warden","Team":"PHI","Position":"Relief Pitcher","Height(inches)":79,"Weight(lbs)":190,"Age":27} +{"Name":"Anderson Garcia","Team":"PHI","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":170,"Age":25} +{"Name":"Eude Brito","Team":"PHI","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":160,"Age":28} +{"Name":"Fabio Castro","Team":"PHI","Position":"Relief Pitcher","Height(inches)":68,"Weight(lbs)":150,"Age":22} +{"Name":"Matt Smith","Team":"PHI","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":225,"Age":27} +{"Name":"Damian Miller","Team":"MLW","Position":"Catcher","Height(inches)":75,"Weight(lbs)":220,"Age":37} +{"Name":"Johnny Estrada","Team":"MLW","Position":"Catcher","Height(inches)":71,"Weight(lbs)":209,"Age":30} +{"Name":"Mike Rivera","Team":"MLW","Position":"Catcher","Height(inches)":72,"Weight(lbs)":210,"Age":30} +{"Name":"J.D. Closser","Team":"MLW","Position":"Catcher","Height(inches)":70,"Weight(lbs)":176,"Age":27} +{"Name":"Prince Fielder","Team":"MLW","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":260,"Age":22} +{"Name":"Rickie Weeks","Team":"MLW","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":195,"Age":24} +{"Name":"Tony Graffanino","Team":"MLW","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":190,"Age":34} +{"Name":"Craig Counsell","Team":"MLW","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":184,"Age":36} +{"Name":"J.J. Hardy","Team":"MLW","Position":"Shortstop","Height(inches)":74,"Weight(lbs)":180,"Age":24} +{"Name":"Bill Hall","Team":"MLW","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":195,"Age":27} +{"Name":"Vinny Rottino","Team":"MLW","Position":"Third Baseman","Height(inches)":72,"Weight(lbs)":195,"Age":26} +{"Name":"Corey Koskie","Team":"MLW","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":219,"Age":33} +{"Name":"Kevin Mench","Team":"MLW","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":225,"Age":29} +{"Name":"Geoff Jenkins","Team":"MLW","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":212,"Age":32} +{"Name":"Brady Clark","Team":"MLW","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":202,"Age":33} +{"Name":"Tony Gwynn Jr.","Team":"MLW","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":185,"Age":24} +{"Name":"Corey Hart","Team":"MLW","Position":"Outfielder","Height(inches)":78,"Weight(lbs)":200,"Age":24} +{"Name":"Gabe Gross","Team":"MLW","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":209,"Age":27} +{"Name":"Laynce Nix","Team":"MLW","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":200,"Age":26} +{"Name":"Drew Anderson","Team":"MLW","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":195,"Age":25} +{"Name":"Claudio Vargas","Team":"MLW","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":228,"Age":28} +{"Name":"Chris Capuano","Team":"MLW","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":210,"Age":28} +{"Name":"Ben Hendrickson","Team":"MLW","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":190,"Age":26} +{"Name":"Dave Bush","Team":"MLW","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":212,"Age":27} +{"Name":"Carlos Villanueva","Team":"MLW","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":190,"Age":23} +{"Name":"Ben Sheets","Team":"MLW","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":218,"Age":28} +{"Name":"Jeff Suppan","Team":"MLW","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":220,"Age":32} +{"Name":"Brian Shouse","Team":"MLW","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":190,"Age":38} +{"Name":"Francisco Cordero","Team":"MLW","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":235,"Age":31} +{"Name":"Derrick Turnbow","Team":"MLW","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":210,"Age":29} +{"Name":"Matt Wise","Team":"MLW","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":200,"Age":31} +{"Name":"Grant Balfour","Team":"MLW","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":188,"Age":29} +{"Name":"Dennis Sarfate","Team":"MLW","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":210,"Age":25} +{"Name":"Jose Capellan","Team":"MLW","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":235,"Age":26} +{"Name":"Greg Aquino","Team":"MLW","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":188,"Age":29} +{"Name":"Josh Bard","Team":"SD","Position":"Catcher","Height(inches)":75,"Weight(lbs)":215,"Age":28} +{"Name":"Rob Bowen","Team":"SD","Position":"Catcher","Height(inches)":75,"Weight(lbs)":216,"Age":26} +{"Name":"Adrian Gonzalez","Team":"SD","Position":"First Baseman","Height(inches)":74,"Weight(lbs)":220,"Age":24} +{"Name":"Marcus Giles","Team":"SD","Position":"Second Baseman","Height(inches)":68,"Weight(lbs)":180,"Age":28} +{"Name":"Todd Walker","Team":"SD","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":185,"Age":33} +{"Name":"Geoff Blum","Team":"SD","Position":"Shortstop","Height(inches)":75,"Weight(lbs)":200,"Age":33} +{"Name":"Khalil Greene","Team":"SD","Position":"Shortstop","Height(inches)":71,"Weight(lbs)":210,"Age":27} +{"Name":"Paul McAnulty","Team":"SD","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":220,"Age":26} +{"Name":"Terrmel Sledge","Team":"SD","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":185,"Age":29} +{"Name":"Jack Cust","Team":"SD","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":231,"Age":28} +{"Name":"Jose Cruz Jr.","Team":"SD","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":210,"Age":32} +{"Name":"Russell Branyan","Team":"SD","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":195,"Age":31} +{"Name":"Mike Cameron","Team":"SD","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":200,"Age":34} +{"Name":"Brian Giles","Team":"SD","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":205,"Age":36} +{"Name":"Kevin Kouzmanoff","Team":"SD","Position":"Designated Hitter","Height(inches)":73,"Weight(lbs)":200,"Age":25} +{"Name":"Mike Thompson","Team":"SD","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":200,"Age":26} +{"Name":"Clay Hensley","Team":"SD","Position":"Starting Pitcher","Height(inches)":71,"Weight(lbs)":190,"Age":27} +{"Name":"Chris Young","Team":"SD","Position":"Starting Pitcher","Height(inches)":82,"Weight(lbs)":250,"Age":27} +{"Name":"Greg Maddux","Team":"SD","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":185,"Age":40} +{"Name":"Jake Peavy","Team":"SD","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":25} +{"Name":"Scott Cassidy","Team":"SD","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":170,"Age":31} +{"Name":"Scott Strickland","Team":"SD","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":180,"Age":30} +{"Name":"Scott Linebrink","Team":"SD","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":208,"Age":30} +{"Name":"Doug Brocail","Team":"SD","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":235,"Age":39} +{"Name":"Trevor Hoffman","Team":"SD","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":215,"Age":39} +{"Name":"Heath Bell","Team":"SD","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":244,"Age":29} +{"Name":"Royce Ring","Team":"SD","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":220,"Age":26} +{"Name":"Cla Meredith","Team":"SD","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":185,"Age":23} +{"Name":"Andrew Brown","Team":"SD","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":230,"Age":26} +{"Name":"Mike Adams","Team":"SD","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":190,"Age":28} +{"Name":"Justin Hampson","Team":"SD","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":200,"Age":26} +{"Name":"Kevin Cameron","Team":"SD","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":180,"Age":27} +{"Name":"Ryan Ketchner","Team":"SD","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":24} +{"Name":"Brian Schneider","Team":"WAS","Position":"Catcher","Height(inches)":73,"Weight(lbs)":196,"Age":30} +{"Name":"Jesus Flores","Team":"WAS","Position":"Catcher","Height(inches)":73,"Weight(lbs)":180,"Age":22} +{"Name":"Larry Broadway","Team":"WAS","Position":"First Baseman","Height(inches)":76,"Weight(lbs)":230,"Age":26} +{"Name":"Nick Johnson","Team":"WAS","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":224,"Age":28} +{"Name":"Bernie Castro","Team":"WAS","Position":"Second Baseman","Height(inches)":70,"Weight(lbs)":160,"Age":27} +{"Name":"Josh Wilson","Team":"WAS","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":178,"Age":25} +{"Name":"Cristian Guzman","Team":"WAS","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":205,"Age":28} +{"Name":"Felipe Lopez","Team":"WAS","Position":"Shortstop","Height(inches)":73,"Weight(lbs)":185,"Age":26} +{"Name":"Ryan Zimmerman","Team":"WAS","Position":"Third Baseman","Height(inches)":75,"Weight(lbs)":210,"Age":22} +{"Name":"Nook Logan","Team":"WAS","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":180,"Age":27} +{"Name":"Ryan Church","Team":"WAS","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":190,"Age":28} +{"Name":"Kory Casto","Team":"WAS","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":200,"Age":25} +{"Name":"Mike Restovich","Team":"WAS","Position":"Outfielder","Height(inches)":76,"Weight(lbs)":257,"Age":28} +{"Name":"Alex Escobar","Team":"WAS","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":190,"Age":28} +{"Name":"Austin Kearns","Team":"WAS","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":220,"Age":26} +{"Name":"Chris Snelling","Team":"WAS","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":165,"Age":25} +{"Name":"Billy Traber","Team":"WAS","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":205,"Age":27} +{"Name":"Tim Redding","Team":"WAS","Position":"Starting Pitcher","Height(inches)":72,"Weight(lbs)":200,"Age":29} +{"Name":"John Patterson","Team":"WAS","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":208,"Age":29} +{"Name":"Shawn Hill","Team":"WAS","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":185,"Age":25} +{"Name":"Joel Hanrahan","Team":"WAS","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":215,"Age":25} +{"Name":"Mike O'Connor","Team":"WAS","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":170,"Age":26} +{"Name":"Emiliano Fruto","Team":"WAS","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":235,"Age":22} +{"Name":"Chris Schroder","Team":"WAS","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":210,"Age":28} +{"Name":"Brett Campbell","Team":"WAS","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":170,"Age":25} +{"Name":"Beltran Perez","Team":"WAS","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":25} +{"Name":"Levale Speigner","Team":"WAS","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":170,"Age":26} +{"Name":"Jason Bergmann","Team":"WAS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":190,"Age":25} +{"Name":"Saul Rivera","Team":"WAS","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":150,"Age":29} +{"Name":"Chris Booker","Team":"WAS","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":230,"Age":30} +{"Name":"Micah Bowie","Team":"WAS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":203,"Age":32} +{"Name":"Jon Rauch","Team":"WAS","Position":"Relief Pitcher","Height(inches)":83,"Weight(lbs)":260,"Age":28} +{"Name":"Jerome Williams","Team":"WAS","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":246,"Age":25} +{"Name":"Luis Ayala","Team":"WAS","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":186,"Age":29} +{"Name":"Ryan Wagner","Team":"WAS","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":210,"Age":24} +{"Name":"Chad Cordero","Team":"WAS","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":198,"Age":24} +{"Name":"Humberto Cota","Team":"PIT","Position":"Catcher","Height(inches)":72,"Weight(lbs)":210,"Age":28} +{"Name":"Ronny Paulino","Team":"PIT","Position":"Catcher","Height(inches)":75,"Weight(lbs)":215,"Age":25} +{"Name":"Adam LaRoche","Team":"PIT","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":180,"Age":27} +{"Name":"Ryan Doumit","Team":"PIT","Position":"First Baseman","Height(inches)":72,"Weight(lbs)":200,"Age":25} +{"Name":"Brad Eldred","Team":"PIT","Position":"First Baseman","Height(inches)":77,"Weight(lbs)":245,"Age":26} +{"Name":"Jose Castillo","Team":"PIT","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":200,"Age":25} +{"Name":"Jack Wilson","Team":"PIT","Position":"Shortstop","Height(inches)":72,"Weight(lbs)":192,"Age":29} +{"Name":"Freddy Sanchez","Team":"PIT","Position":"Third Baseman","Height(inches)":70,"Weight(lbs)":192,"Age":29} +{"Name":"Jason Bay","Team":"PIT","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":200,"Age":28} +{"Name":"Jose Bautista","Team":"PIT","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":192,"Age":26} +{"Name":"Xavier Nady","Team":"PIT","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":205,"Age":28} +{"Name":"Jody Gerut","Team":"PIT","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":190,"Age":29} +{"Name":"Nate McLouth","Team":"PIT","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":186,"Age":25} +{"Name":"Chris Duffy","Team":"PIT","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":170,"Age":26} +{"Name":"Rajai Davis","Team":"PIT","Position":"Outfielder","Height(inches)":71,"Weight(lbs)":197,"Age":26} +{"Name":"Shane Youman","Team":"PIT","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":219,"Age":27} +{"Name":"Yoslan Herrera","Team":"PIT","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":25} +{"Name":"Josh Shortslef","Team":"PIT","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":220,"Age":25} +{"Name":"Zach Duke","Team":"PIT","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":207,"Age":23} +{"Name":"Paul Maholm","Team":"PIT","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":225,"Age":24} +{"Name":"Tom Gorzelanny","Team":"PIT","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":207,"Age":24} +{"Name":"Shawn Chacon","Team":"PIT","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":212,"Age":29} +{"Name":"Tony Armas Jr.","Team":"PIT","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":225,"Age":28} +{"Name":"Ian Snell","Team":"PIT","Position":"Starting Pitcher","Height(inches)":71,"Weight(lbs)":170,"Age":25} +{"Name":"Sean Burnett","Team":"PIT","Position":"Starting Pitcher","Height(inches)":71,"Weight(lbs)":190,"Age":24} +{"Name":"John Grabow","Team":"PIT","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":210,"Age":28} +{"Name":"Marty McLeary","Team":"PIT","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":230,"Age":32} +{"Name":"Salomon Torres","Team":"PIT","Position":"Relief Pitcher","Height(inches)":71,"Weight(lbs)":210,"Age":34} +{"Name":"Damaso Marte","Team":"PIT","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":32} +{"Name":"Matt Capps","Team":"PIT","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":238,"Age":23} +{"Name":"Josh Sharpless","Team":"PIT","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":234,"Age":26} +{"Name":"Bryan Bullington","Team":"PIT","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":222,"Age":26} +{"Name":"Jonah Bayliss","Team":"PIT","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":200,"Age":26} +{"Name":"Brian Rogers","Team":"PIT","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":190,"Age":24} +{"Name":"Juan Perez","Team":"PIT","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":170,"Age":28} +{"Name":"Bengie Molina","Team":"SF","Position":"Catcher","Height(inches)":71,"Weight(lbs)":220,"Age":32} +{"Name":"Eliezer Alfonzo","Team":"SF","Position":"Catcher","Height(inches)":72,"Weight(lbs)":223,"Age":28} +{"Name":"Lance Niekro","Team":"SF","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":210,"Age":28} +{"Name":"Mark Sweeney","Team":"SF","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":215,"Age":37} +{"Name":"Ray Durham","Team":"SF","Position":"Second Baseman","Height(inches)":68,"Weight(lbs)":196,"Age":35} +{"Name":"Kevin Frandsen","Team":"SF","Position":"Second Baseman","Height(inches)":72,"Weight(lbs)":175,"Age":24} +{"Name":"Omar Vizquel","Team":"SF","Position":"Shortstop","Height(inches)":69,"Weight(lbs)":175,"Age":39} +{"Name":"Rich Aurilia","Team":"SF","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":189,"Age":35} +{"Name":"Pedro Feliz","Team":"SF","Position":"Third Baseman","Height(inches)":73,"Weight(lbs)":205,"Age":31} +{"Name":"Todd Linden","Team":"SF","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":210,"Age":26} +{"Name":"Dave Roberts","Team":"SF","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":180,"Age":34} +{"Name":"Jason Ellison","Team":"SF","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":180,"Age":28} +{"Name":"Randy Winn","Team":"SF","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":197,"Age":32} +{"Name":"Ryan Klesko","Team":"SF","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":220,"Age":35} +{"Name":"Barry Bonds","Team":"SF","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":228,"Age":42} +{"Name":"Fred Lewis","Team":"SF","Position":"Outfielder","Height(inches)":74,"Weight(lbs)":190,"Age":26} +{"Name":"Kelyn Acosta","Team":"SF","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":204,"Age":21} +{"Name":"Jonathan Sanchez","Team":"SF","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":165,"Age":24} +{"Name":"Matt Cain","Team":"SF","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":216,"Age":22} +{"Name":"Matt Morris","Team":"SF","Position":"Starting Pitcher","Height(inches)":77,"Weight(lbs)":220,"Age":32} +{"Name":"Russ Ortiz","Team":"SF","Position":"Starting Pitcher","Height(inches)":73,"Weight(lbs)":208,"Age":32} +{"Name":"Noah Lowry","Team":"SF","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":210,"Age":26} +{"Name":"Barry Zito","Team":"SF","Position":"Starting Pitcher","Height(inches)":76,"Weight(lbs)":215,"Age":28} +{"Name":"Vinnie Chulk","Team":"SF","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":195,"Age":28} +{"Name":"Kevin Correia","Team":"SF","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":26} +{"Name":"Steve Kline","Team":"SF","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":215,"Age":34} +{"Name":"Armando Benitez","Team":"SF","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":229,"Age":34} +{"Name":"Scott Munter","Team":"SF","Position":"Relief Pitcher","Height(inches)":78,"Weight(lbs)":240,"Age":26} +{"Name":"Jack Taschner","Team":"SF","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":207,"Age":28} +{"Name":"Brian Wilson","Team":"SF","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":205,"Age":24} +{"Name":"Merkin Valdez","Team":"SF","Position":"Relief Pitcher","Height(inches)":77,"Weight(lbs)":208,"Age":25} +{"Name":"Brad Hennessey","Team":"SF","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":185,"Age":27} +{"Name":"Billy Sadler","Team":"SF","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":190,"Age":25} +{"Name":"Pat Misch","Team":"SF","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":170,"Age":25} +{"Name":"Gary Bennett","Team":"STL","Position":"Catcher","Height(inches)":72,"Weight(lbs)":208,"Age":34} +{"Name":"Yadier Molina","Team":"STL","Position":"Catcher","Height(inches)":71,"Weight(lbs)":225,"Age":24} +{"Name":"John Nelson","Team":"STL","Position":"First Baseman","Height(inches)":73,"Weight(lbs)":190,"Age":27} +{"Name":"Albert Pujols","Team":"STL","Position":"First Baseman","Height(inches)":75,"Weight(lbs)":225,"Age":27} +{"Name":"Adam Kennedy","Team":"STL","Position":"Second Baseman","Height(inches)":73,"Weight(lbs)":185,"Age":31} +{"Name":"Aaron Miles","Team":"STL","Position":"Second Baseman","Height(inches)":67,"Weight(lbs)":180,"Age":30} +{"Name":"David Eckstein","Team":"STL","Position":"Shortstop","Height(inches)":67,"Weight(lbs)":165,"Age":32} +{"Name":"Scott Rolen","Team":"STL","Position":"Third Baseman","Height(inches)":76,"Weight(lbs)":240,"Age":31} +{"Name":"Scott Spiezio","Team":"STL","Position":"Third Baseman","Height(inches)":74,"Weight(lbs)":220,"Age":34} +{"Name":"Jim Edmonds","Team":"STL","Position":"Outfielder","Height(inches)":73,"Weight(lbs)":212,"Age":36} +{"Name":"So Taguchi","Team":"STL","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":163,"Age":37} +{"Name":"Juan Encarnacion","Team":"STL","Position":"Outfielder","Height(inches)":75,"Weight(lbs)":215,"Age":30} +{"Name":"Skip Schumaker","Team":"STL","Position":"Outfielder","Height(inches)":70,"Weight(lbs)":175,"Age":27} +{"Name":"John Rodriguez","Team":"STL","Position":"Outfielder","Height(inches)":72,"Weight(lbs)":205,"Age":29} +{"Name":"Chris Duncan","Team":"STL","Position":"Outfielder","Height(inches)":77,"Weight(lbs)":210,"Age":25} +{"Name":"Adam Wainwright","Team":"STL","Position":"Starting Pitcher","Height(inches)":79,"Weight(lbs)":205,"Age":25} +{"Name":"Mark Mulder","Team":"STL","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":208,"Age":29} +{"Name":"Anthony Reyes","Team":"STL","Position":"Starting Pitcher","Height(inches)":74,"Weight(lbs)":215,"Age":25} +{"Name":"Ryan Franklin","Team":"STL","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":180,"Age":33} +{"Name":"Kip Wells","Team":"STL","Position":"Starting Pitcher","Height(inches)":75,"Weight(lbs)":200,"Age":29} +{"Name":"Chris Carpenter","Team":"STL","Position":"Starting Pitcher","Height(inches)":78,"Weight(lbs)":230,"Age":31} +{"Name":"Russ Springer","Team":"STL","Position":"Relief Pitcher","Height(inches)":76,"Weight(lbs)":211,"Age":38} +{"Name":"Jason Isringhausen","Team":"STL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":230,"Age":34} +{"Name":"Ricardo Rincon","Team":"STL","Position":"Relief Pitcher","Height(inches)":69,"Weight(lbs)":190,"Age":36} +{"Name":"Braden Looper","Team":"STL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":220,"Age":32} +{"Name":"Randy Flores","Team":"STL","Position":"Relief Pitcher","Height(inches)":72,"Weight(lbs)":180,"Age":31} +{"Name":"Josh Hancock","Team":"STL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":205,"Age":28} +{"Name":"Brad Thompson","Team":"STL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":190,"Age":25} +{"Name":"Tyler Johnson","Team":"STL","Position":"Relief Pitcher","Height(inches)":74,"Weight(lbs)":180,"Age":25} +{"Name":"Chris Narveson","Team":"STL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":205,"Age":25} +{"Name":"Randy Keisler","Team":"STL","Position":"Relief Pitcher","Height(inches)":75,"Weight(lbs)":190,"Age":31} +{"Name":"Josh Kinney","Team":"STL","Position":"Relief Pitcher","Height(inches)":73,"Weight(lbs)":195,"Age":27} diff --git a/tests/common/storages/samples/met_csv/A801/A881_20230920.csv b/tests/common/storages/samples/met_csv/A801/A881_20230920.csv new file mode 100644 index 0000000000..c32710c41d --- /dev/null +++ b/tests/common/storages/samples/met_csv/A801/A881_20230920.csv @@ -0,0 +1,25 @@ +code,date,temperature +A881,2023-09-20 00:00:00,18.7 +A881,2023-09-20 01:00:00,18.8 +A881,2023-09-20 02:00:00,18.7 +A881,2023-09-20 03:00:00,18.8 +A881,2023-09-20 04:00:00,18.5 +A881,2023-09-20 05:00:00,17.9 +A881,2023-09-20 06:00:00,18.0 +A881,2023-09-20 07:00:00,17.9 +A881,2023-09-20 08:00:00,17.9 +A881,2023-09-20 09:00:00,18.1 +A881,2023-09-20 10:00:00,18.0 +A881,2023-09-20 11:00:00,18.9 +A881,2023-09-20 12:00:00,20.2 +A881,2023-09-20 13:00:00,21.6 +A881,2023-09-20 14:00:00,23.5 +A881,2023-09-20 15:00:00,24.8 +A881,2023-09-20 16:00:00,26.3 +A881,2023-09-20 17:00:00,26.4 +A881,2023-09-20 18:00:00,24.8 +A881,2023-09-20 19:00:00,24.4 +A881,2023-09-20 20:00:00,23.9 +A881,2023-09-20 21:00:00,22.2 +A881,2023-09-20 22:00:00,20.8 +A881,2023-09-20 23:00:00,20.3 diff --git a/tests/common/storages/samples/met_csv/A803/A803_20230919.csv b/tests/common/storages/samples/met_csv/A803/A803_20230919.csv new file mode 100644 index 0000000000..860057d06e --- /dev/null +++ b/tests/common/storages/samples/met_csv/A803/A803_20230919.csv @@ -0,0 +1,25 @@ +code,date,temperature +A803,2023-09-19 00:00:00,18.8 +A803,2023-09-19 01:00:00,18.6 +A803,2023-09-19 02:00:00,18.4 +A803,2023-09-19 03:00:00,19.1 +A803,2023-09-19 04:00:00,18.7 +A803,2023-09-19 05:00:00,18.2 +A803,2023-09-19 06:00:00,17.9 +A803,2023-09-19 07:00:00,17.7 +A803,2023-09-19 08:00:00,16.4 +A803,2023-09-19 09:00:00,16.9 +A803,2023-09-19 10:00:00,17.1 +A803,2023-09-19 11:00:00,17.6 +A803,2023-09-19 12:00:00,18.5 +A803,2023-09-19 13:00:00,19.0 +A803,2023-09-19 14:00:00,20.2 +A803,2023-09-19 15:00:00,21.9 +A803,2023-09-19 16:00:00,22.7 +A803,2023-09-19 17:00:00,23.0 +A803,2023-09-19 18:00:00,23.8 +A803,2023-09-19 19:00:00,24.0 +A803,2023-09-19 20:00:00,23.3 +A803,2023-09-19 21:00:00,22.9 +A803,2023-09-19 22:00:00,22.1 +A803,2023-09-19 23:00:00,21.8 diff --git a/tests/common/storages/samples/met_csv/A803/A803_20230920.csv b/tests/common/storages/samples/met_csv/A803/A803_20230920.csv new file mode 100644 index 0000000000..af417725b2 --- /dev/null +++ b/tests/common/storages/samples/met_csv/A803/A803_20230920.csv @@ -0,0 +1,25 @@ +code,date,temperature +A803,2023-09-20 00:00:00,21.7 +A803,2023-09-20 01:00:00,21.6 +A803,2023-09-20 02:00:00,21.2 +A803,2023-09-20 03:00:00,20.4 +A803,2023-09-20 04:00:00,20.0 +A803,2023-09-20 05:00:00,19.5 +A803,2023-09-20 06:00:00,19.5 +A803,2023-09-20 07:00:00,19.4 +A803,2023-09-20 08:00:00,19.2 +A803,2023-09-20 09:00:00,19.2 +A803,2023-09-20 10:00:00,19.3 +A803,2023-09-20 11:00:00,19.3 +A803,2023-09-20 12:00:00,19.9 +A803,2023-09-20 13:00:00,22.3 +A803,2023-09-20 14:00:00,25.0 +A803,2023-09-20 15:00:00,26.1 +A803,2023-09-20 16:00:00,27.1 +A803,2023-09-20 17:00:00,27.2 +A803,2023-09-20 18:00:00,27.5 +A803,2023-09-20 19:00:00,27.8 +A803,2023-09-20 20:00:00,27.1 +A803,2023-09-20 21:00:00,26.0 +A803,2023-09-20 22:00:00,25.0 +A803,2023-09-20 23:00:00,24.1 diff --git a/tests/common/storages/samples/parquet/mlb_players.parquet b/tests/common/storages/samples/parquet/mlb_players.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b32379657cb001896d78583e3256adcc9ad21aac GIT binary patch literal 21026 zcmch<30M?Y);C^--B{IBU#g3yY&$JhA&sD*fN0Q|gzBc*Hvs|3W_5Q#msEFQFF=z? z1O)}7MvW#WV-hhEmx;z?qS+I1ix_pxIwmto#E8*klF2^VCi$O6^G;?m^Z%aj`QG>C z@j!QN_ug~Q{hf1u=a!X1Rc5><{>+M~_)SmZrSWU&c#OfYzbwN)tet4m2_dIn3Uh^_#zviC zEnOXiR`AZ8VyUr_%a>KZx;5*@QdMe`d8|#dqg(}LvlB&ez$gdis*%r*NIBACpUD!W+nOWrV75DLPqNqj|)Fs*nRS1C*W zj<7G#EaO?08r2DtA}ZQX95%W{^-2CBM@|jQo<|O{oVYsQfO_F?4*nnaBOUCkPoZxd? z-^J&pm4?8KY9tt)U&ZrMDn+NFauppRpNJ2pws4iIJ0Q7zax!2G|}LhpSm_lf`E3Hk0h-YCNi!+?E8-4s#VSsR4dQHt5-}fNBdx zsR>leSPCUE=;10o{QFpo!>uV0MJ=7Ls9`tG!-xVfB-Ixb99ka6HnKpL0g zkwmT#OjGhX#n5@syNWMRdzv*$>8Bw3YuI zST(M+8d_Vs8oI=SJRQ&)Oe!mE<;x6%4f~)pO zf8gQ(74iYKT*93+V5QqXI2G&Y;jibIO5a;2fB zo@~csbg9}VDLrox07=6A6ae1!+BsbDw7h9t9@ry(XG)pq^8tJS^!Vk?uPh@#N!A7| zPg=R)8ezQ&>;<4g!aG2-E>JH_vDHJS(-r=p+#&Q~_Woxz)vlho8ebExay792>cnQL zkuTtDvEu$DlTCsN0$Znr%aghvb zZ3mfXvH@%=YMWq4O|j%jic9o+r6WH?4*T4q4=p2TALu%azqtA@prID2LkK4oPqR;h z3GzyQM_wAhngUSilVrcj^ZZj?_flf3=qEF+&lzEqun6(oo}F}_1cv9hM^h_1?lFOv z0xOfOZma~q)dI7xwzP<@k(MJK3M{+Xp%p_a6@*rNA{&xt1uHDRb{*;su{X6+>Xgz zleOD33c!_w0b@EOuj(K8N+wNZvdbe%%E;T68owIk%H{SxQ&DL|CrZfWlJVc|?4WE4 zVCNkf;Nn^Vl?%m2a06o8$P1J$48Brtu`4oo=~79rq&0(kf~j!&Rkay^60`I7!s2oE zU$B1OWwXFSuLf)LJHDl=#72?dgGr(B{O_o0MRl>ylNk3607%gfBo$JE9m4neE^y?qcwK79 zFVXCSI!vAQmX1GfvZ;QxEzoRDl^p1zZl^wV5nuV%X}VGtJ37d3r39=*IMJ8EB}6!tY8EEPYjfofMM zD9V~kJHZ6ifOGtGqC{?zAA#9A4b!mOYv^gAz~QtO6&fLMf z)Dc5~CVCvCi!K3g89ee=?RxOO!GI_Y$h=Vnv~HHAeQb)3vcZ6~uck7_I@#wAsLWkD z{u-kjY)foRjk30A6cPWOunC>gA-hf!kgOMYjw*xzMQZ_&ba`-uxye|3juBjePjWL` zbs3I8P;$aZOI1HFXs*Cv}=aDfPwF zlSHLo9W>+Ln{)MqKjdTQl57k~L)0A%vfC)E25I%6H_^*~%zUq-Yhl#}aPP>1q@^dsrtxSHbIgZ5FQ?;IilnSp1Uf$OTZj;%kPpMQym+V#{R8#s%(NC94a-%#r z&==3&N~@3}gcyGe-h)-~-|_`iP!^jQnx<-0UzmTv2tE>4@5q{jj|c()QSx5;3IZFS zU)6>oGHPl<^`x-^jOlu5U;}!Updu*;dLsstU{&G=5lJxn2%=Syv<7dIrpAMQhwwWY zF@}P!Hh$G+&BvxfqlZ?x)$`#IB|^wGwkR zCF_!EWq%V`T&);LQ1o3s2vn+6h!vmy(Fmrhf+jzIV;|U++s97O_gqish5Sz0dS}@; zm>=OtxZRNj*-k+En66RXZjSjx$KS)eM;IGIUNJ0^v2Ybf5;bD*h>QvBA^p%Mp4BW+ z@pmXl1vO`tja z_m%t{t|HJTD$rgzdY5G-fIKw{?KNP{{J`#2Mw=Lv13~M$ zo+sI6JuYTI`sG(4P7?zjXNdiQF~3GdCf+Yxt^J7EN=25jq&gg3-9%+5XpUo;eFjRB zg8}pHF-a0=g)5dJ2-&I4hJ?_?RZPxTM7TRR(um%Vp$kKh%UT~}&XRco$P z*Rew8Jx2T1k)LP)qhJW)$p)%Hb^6m^z-;(MVtz}Iol17?pdqvp+t3dTz7)c87q}T4 zOb(ie@LcA4M3uv0W=rFe9E!Y1^Z+V1Us_`XCk_cax(Y>tujyDZvnPgt)cle!2|O)? zyTuSao!mE+K$nOeUirv`fz}KzUkwC8-mwsq7K`Gzff`fsw909aNhZufXqVn(SHZ*i z#rwz7g`(FhF$BS`h%?i%{Z9_HL(;A`OV;Jg$2zK7R77opW2Ui6b~m+&%-1Ac6>_`7 ziI1a{Vo^#j2{m(7;SgTZvlLcxD?}STji`#GAsm5V1mXlhd&nW|%4`jKP^SrY6j7;% zyErj371!jc=n;b)9C?W<6{|vJ1{37Zwh$yGWcTf(mtaN>fwONn%%)qMIUZZT}$7n>Q=W$DE2x6wfkb8G(>h&6jg7z7-*BUF_ zq9O+_-HKmpDFDzZlUyF{wNyD|@Jt+uNj+w8Og4xWWOPR?ucrXVn#JEzvaea{KP6uW z4vi}n0iOcQdE=29qE1qfmd3)Yk<}E-=m0|(Kw_ti>^dS(*@mo7Ll7Z`wA&!21rEgv zF&fY|!bl_}04K%hsMHdO6#~tS9c5Iwf)E6%z!3!@Ybc^2xi6Q{P6FQpN{)`D-O#Yi z=Fqz-s$vnu6QApMz8s0r(3v>GuXYHF&3_=UAhVjn4e9Sua=Rq6hB&SWB8(EqBNTJA zE=3gqGklx?oftT8j*FE?@D&iU_?m?bjiMH9kKnBWLDPq*%fT$6G<7n{9hV@In8;5j zs$tA&acmR;QF~)hbv%h)jLm2UPaFh;^a1sv3g44oMHW-21CN!HDH|MPOuEFb7)_0Y z|1j`3VHREGkz^lw-eCPy<8ZSN#}Kuuf^H}9LQLq#jF7AP#W4TZ1V2R2F7WX|5A(h$ zyA~|h7nH!?yCH*S>^k{O_#ffX;_4pCOB?Yy` z83GVDpmweTLX75~(jz615j6RvpU__@qD=IdGfe3#jS%Fka!`l`?Cl><*u)UKkUnxP zQ59-&T$2Q0H&+UdUy;mREM~_)0>ub(`M=3?jDW6gugE+#zW;5y37lSt>DQ%-3FGzr zg{0C*UR444l=HDfAQ8CXAEiUAg+|pZUq@a?RC{H1Ta0y;rp(2zjKy!WRHYDz0LQTqbrNVgiClW|b+$sUkyY(I1cV;w=2&s9mgbwVW@fxz zBL=kTrU=_uD*LS4X~4*k58bRYCmDuRsu(gm^P)Io0Yvy>D@tcHqKmyn=SZZ0@bZ~) zqozt_?;NiY-R$mI?IWmHj|I#xP$tz2B$20?dD%E}i}-gwKGjd;scn~@?Yo_-0a}9n zBVytW;G|*gYiK$`*@f|X!rvnCJXVZ&6IO;wEZ%CS_bM7<)}|2qf}Wkh(q*m!wQZVV ztP#@mfa;&s8PzqU@ym>h>S}?8aU;IK{5eSoE*HQ6|F?lPKykPMd6}n}uBBL+;KwKQ zmC=P@R6!;`4lC9#r&YgC5}D(Oc@QuaGMk9Wq{Z^jV=~T&W%wozShPjJWJ1XE9t@N0 z9f?M(#-Z^>{61~bth`ubgcr5T=pc!y{3NVg)Ppzk^<|Paq9*KIm)ZPoE_u_yId1*&C* zcQnN+qS+~W6RIOk5V%##N~`p~^}`4m!&Vt0gj6GE0}54|vmMuz%4kna&ySGuqn9jT zqRp(GfF%Kmr4;Z;jChr%0M^C?rpu!3WM0;3@+4HCV+}DVE|QPqWofPw@?fSdhSA32H{jQp9PTFY`&Iq7VSZK< zZ-R)srA6|yD#dp|UIjb=$1E}9Sq`QPr(CKOVgm8ncZfV$wBF0!!HSQWZ`YxZ2u-Ms ztQgD|cO?M`f?x4NHfJqM?SKfBi9ubJ>;tq$_4GZdk-g^C#Nc&_lu7Txx8gCtA9+hhVP0zi^L_1QdVT1rp#uR87A15 zX(lTMt3d6DdHBDevkGo zsxmUNKOv?j26E&+61Rx!G+YHbN#Q7Uo5m4NeMnOxp_eCi#qf)@2dO;C+k$S55f=!H zC?H_{3=!janyw2$Kx%y;q1FU}U#r^F)>oJ8y2|{WPE)4#pFHA$IR@y6|1`Z>F z1V*foeP%o+F@HBu5#qaNM$aTom56>acQu+j2?q)3Gk$ZzX&qH3Ii2_+%Hi|N!l89> zc#;Lk77@5$3B~gKPljyBp4`Z8#Ls90z!!mmMWLPd8uIuXnc0l7Rs>A)uiJ#O6OOFJ z-G&-XX_$RjpZ+LSp~$UTZeY8y28`CHG7~3Sf1<%oR3~$oL!zh%k}4#*=W#N%rR-;a<-z?J#6Ut)s!k8wr;YFv=DK1)K42|YCl zm7zLl~6cv<4pf$04b5S-7As`h^L^PBBEA*<0t@0nYqPS_VT5J>}~Aa8ZoQ@V+}X})uw-?N0vI!w zq9ss56Q4crw&8N%jFOV%WK6_c`8c#um-R|i50G_`6?jxXvk|e+Fh++D@W+SlG)jMv zo`>AFrA6jb@FGpQit3UH$WYJ@f}hl_2@fCcZAO%XCpWHH`U0>wA3T111fn96)H9we5bFTv4Y1{BCR2n>R?uq(aCUXE1GgGWWq|Oe{uly^tEt~X#0P1BB3YPaCLlf* zNywU@<~z z`dj57SEDjJ3C!u}O5H+0$c@hGkK6!@9-Wwk*%N(M?I)0n{y5&+Nr+$&E3cv<8B!F! zlYsPa7%EI5S$L5dLR?JazYTzNWo`X4{_Bk zvf_l=gh%^4pe$6HszZu0h-*I;%;-_0rbBcr>RLmDxf(Pt$$XCii3C)cpv2*3`(v4Q z158L;f`BxQjmb6&R&6Wz;Yt&}9%{b;N9l#(5c<}@e3#0~dQ6=#39X_mfU|AD;k%gM z(vChVUu|mQwMB)5T~$X)nQRJeqL~fjaHnRzlUZdl&lo2-*fxSH1WaQeo`65dJR5__ z*0|NBCP*kf9#zq1r6;4QCcckt)tBAFC$i7#C>TkL`L%dWNJIlxhC_HJc?ioN{!MBh zFw@pOkzVtC_)Ay~6U!68_FG4$7LCa3FcGn2kxpAK~(#`~oE(huB{L5nv9}qsKjEA18i21-&`$ zWHNsv=0HY*-N22X21Hw%N7SCt-_49QvvE3h1IyfH!gEu~R5<`Sf}p4&aTqIw#E9@U z1-f!5MRGSWi(?2u5z%U3wb=qn+O4USh6|Y49786*F#)e3fPzQEhJ;-dxUHmE4J&Z(%nQFg5nFN?0YKUhk68}=em7#-_Sb+X$iBPfelj&A*5!659 zG6X za4#XZaBOHIZNVIsw;JhN@UxN}Qa)yv_65u{vgaOy4mq#Qq&6kOXEM+{Moun8ExpX5e1* zTMAWP#imcgJJ82*OyLCWadH=g);^zz@HVroGYSD%C$jbt>o%$sNSY&W5a1()y=V%{ zu3%&!1yp}?qTY($0q^XQByVw#i-y_&pxxndU_MV7G>WhPZOYxztPVVo+6`dD{J`Tw z5G&I2LXd4;+LzKHK`IY*G9eH|y9u67{Th=IOKO51^Y&N*I7l?aU}Zg@^Uo0#iWj|` z0K5@Q-@d_*2}owqMk9Y~1mC~s*cV8w1?}R5ih*Yc;3ce=!-%WMdF)ja%zD$h4}|sR zi8PfT^1H}Bb~=L&BoLBUE^>4ogc>jqW}YKn>n^GU!YTY=^rV57I0y9NFbzSo(iWZ; zvM{eA@R2Re%ol|G8uQU){C1|unDsoRoo)iF0v{fP+ja5b;LG;THDo zN$AlDsEUxT!wS(Y>6pyCX2FftRpJT3s!=2+FEOn~Yz8h;2In9LnIbN6Dif*Nw3~0A zkXEPyjzPNN;YvcR?D=|C$L?emuog7?Qb~fOy2m+|JOVH>8N{mh_AT5U{zz zkzFAjs&nUCm`z+x1GIaa9S<U^2U$*|`HAkvOjB1S*VYhBDdXBudoNKrcC+9t56GDdB#3}xO^_Y?B5~W+L?(r@s!;AgV<(9BVgkZA0)i-ZAES|6OcOie zhpM@o{qscjNAS8LT76iOj*rA2ZsxiKaV7eDibIA% zGeArgkXmI7dmtN%hEOSVyu?n8rOG01@uuX|r z;Up8V5~9CRWH+TV2O_n{r0JN_cc0{i>1;?~oJOV`;y01K7NNLoz_m81=pJ@o(qoSc zv(Y8e7y_zTR@ke@F%Rk4tuy)V)cu4_35}#1OP~S?XWr3i_l!7!>=S%YUuJgekS`1K zqIe!t03GpnVLW3w$_``|znpv)DsdhBc`@~~98^hQ;nV`h+mW2AaQeu))H-edps}b< zQ=&56@dVf-UZlQ>F7z)I;Hk(s^qlP@Haes2zWk$xDK;Tx>m3)ptASIS`QD zj60SDUOC9`On5ntG+KZ^lbm2VOuCtQInD^k0;N3WcjM4j!UW-aAv9usP6yy*k3|zG zsf^t<0jn}MFi9|rKrBN=NoH@HIIH4Myh5{;DHxp}Nf1j$omuir`UZ#8!{%6Q+)#l{K zW@r$=MVag&csAN~aJrhhhj6N~?KR(LjhM6W57`#xfpC4%YuhKsDt+Kr}) z$&f$$RA~UUPJ-x#;`03=@+o}}?eainY3E%-+tBVLjZ-4)&9e~Ot>?>3rBbU1V1;&P zYnpuIEVc@`6i9EpBKu|%^g>Q~`a4nv(^H$tKSzoj0G3~{sn-mrz)pyeYvdRWyVz83 z@>s;*!5jiAF)Xt0S*&-kSL^xXOeeYedA>PuvK|nNy_gEDh~C^4eNrnthH+-EZg9JH z9u4WWS7vU%y8lZP=TKlPnH#U{i$!sJ(K7va3M0Q zH{sv2Ul0_~177x0EU^GWyho=;?D)~yOs_#ABYRw6>!%_$c7233ptq-@1C#Mv_;}Qk zr~yZMp2cd=6S1)5JD^1Ids6QN3q&{28keg5cmxN=2WabiuEr~|BIKAOyri9u_H*WI zd3>Q`9}O_zRL$?q)cAww4;F|>J|OtHk;0RT1d4emPWu|r5?(RD7Sqf$bfg_>|G?_a z(*MCI`deTJ#lRxS>Da@t^+GH#FNi_j7?THzYGFlYvuE{5Y%oozWD_a$Y9>DnfgnI+ z4%;$YI0wE}mO6kQyqUy0m>80sA4|hNgl4~%W=P8XL^57wg8h|HgY%oyOnrYFz5#Qa z&x~X8ra_$*HU_dU&1D|D$|%5YI7N2ECrdQrjX5$5L730YvZj$QnICklnJN;&#~4iP z%@*@P3Ydx89H(IcRAf2!qWKdvVGcc?uKl~Pmxgk1n`~`?j93Z#%vi$UGyGiU?5vwk zr+2eav$T4MfzjVs?YD?c0!BB6$0Nl;(Sy?UnBN*thBXZ`TdqT^$o>Js6H=IaQ&O!F zj?gcqYS@pj)s!e^YMju>!LC{kv>*rBy=g;dHDw|6Bh5d96^kpOY#kW9u7^s!wDVh> z2Zj}Rwaz%qz4HU<&**mHU_!M&?1$6AgQMreFhn=TN-57Q$Sk`5hCouWlrW89j;~wTT*W! z--p0PVV{{!#=`D9m>k)9l06!O?w@2`VyuQ;XJQkguCqRmg_y7*>j@K8-7L2ZFVw(Z zi}FM~1Wr2DAU?USkpPNeBtyHOt`lWnTcY3S_zd=PC~o!15852VY&GKFIfBCH zsQKzH6IBA~1T$b2t4I;d%iqGh%WE@**;I9lUq&V4F^2hmsxh)xuLw3iQc&Xvu)ncr zs$}NAB($EuTnTScn27Rnfn`uzQokztJDWk^r+TbL03(;`a>&*%VWSCtE98=^IUdTA zeQ%Nv!u|w*Yr@-lvL=FbdrTv1Qbsq{LIn-B(#B))2if=&Y+C#VI-ZV?j*qs?!cs;{ zgy7Pl2GJ~;sL_qSky;AuNr6&tf6SL#v}54I1{#l9f)SlqbZ%#q=`2PRc1E!WI-_&W z;&gAPCONlDSARCjbgVOa)`2e6es&B;bP@TvOPNh)H7SK%y4nLv*{9BGGkUwI#@uD* zGiRf7j&-5d1IrS=IXgC&SV}L??M^fe6NQCKnY#~kC+7^4mAy-i59jtwsUOzWA6v@q zKhQI+eK=+zvCMQdcX`&PVaiju%>3qo<+Gj|*0=O7vs}nsG3U%MYCpCt;p+n{uKQ*< zwv*`QqJ>_Y=^Wi#*qz7@_U7lDGpz0HPD&Q~itEoYn~rrS&l>D2Z$B5eh3HAi7gknn zI%nKb*fXVeaAoaN=i>MF_M|lmcQl+i$3AteXIksv9XEe-ZrlN4dB$>KmDBXT=~&_N zth)zSHRilO{&es1?1zQbi|XGupETWb_uTa-E<*)N73p6`T)76 zWx0LxInxI@$BNeY?>@BoLe2-XPxq|}K5W1L!}<^AoEcivy8qDqpR|8)%~|r!_M`R( zKHv1g+>1qbcD#A$fv=zX;M!07?)=RK`-9(|`QW-G-wfS(``3pa{PCL)uE%t1JEQZq zM2)|gOBAnN${yYlJ^P|SuUy-moY${gaM5NuzIOSn!~H0H(a!1C_2%boW$wM0mr}fL zW$odu?9&$=87tSVYRr4c{MN<%oa5`(v>tva;oFM^xw`dhU!Reb7(Y~4V5?fc;fbW= znL|YtORF|)%eGFLKU7?IuFnq0%N>^?k2DkvwbPP?@^4 zdh_}0$#dQqDsMYj{lJ$`Oup``p^7C@HCr?@Q*80aD_7WRwi@@PAuj*H@p{B_tfE;)BGD>zVQQF-7~N6o7TGT<(vMxwC=g{GpBdF@$$`oJ6QL^ zm;0vQ{?*I3{1jDxKr<_2Y5a+WW9{_^jr%i}&paWbHka# zvt~WM@ubpZzv1oI_s`nD@1$4#z2VR2lAnC~mEFG9XRmtq^M{{&{aAiyz$tc(4Jc-{KYx>*|%QV)4KoJ37`14Kl{#Sd)i+3e!{0qCqFlAc(ncK zH3^@u-~QZ4=A+?Lwj~73B>$~44d~jhX+|jpzPU5DBFR%%v_*lYTR}H^~aseN!u~ldhVJ~&yh^P=s z;BfhZ1@Jr1QG<`=svSb)VKp{9T2o@j_goq6)4kD=LTEg&J}pR;Zms2vrSu zye7X?+l1N4Ng8_v94lW?hFgfjiurgoR;o=TDhrEnBT-gb2gUyCJY27-6SViwtt_gE zG*X=(>0otDDU6{Pk6(lTv$GphCsdopK%e2Fyie^{9Gf;DHdN(D6^_Z*6l-&%i^t{> zHZotAACpJf^u?$!HiynN6heW%)R-4NfByO}bq`yDyq9&q zT+1HQByV{QPu_JJkLdm%jsIIs?~00F51M91HN6qv6xnG7yHg@NXd^FREaWP>EN1*P_PkbHb|S{JsQ}VACQ}1 z_dFCG3t>Nq7NdE4&HVZETie>&SYiyu&&Z>-@s03pMQ?8}ff)>$b{wK% zH~`;!F*?f*S4(Yecp^sQS$TH2x3)pS0(U&>;&k&hROInyiLy@_48AG+sf$J@fI2cscBtn*qNK`WB2P zAH^4I;|S=6b{iF<$C~j$icd_mYcn8iK1~?-MErIvq&-c*99s3-YxNZv#%B_=Hb&nG z<4Bw_-#Q&zfp<}3F+&#q0REt+*Tkor_=qM#j5QrIXm7+V*b1$ZmKaecC~F1uIr-gp ze_?e5?0lAkPzJbcd7tRmvqbXVGkEfkZ;V>rf10j;Ylmy2;!+OW6pD#tg6SoqIyr=W*qhrq=L1foo06Q3^^%&kxd&1{FwW~{_!I|5 zUnIsXr9bFQJYbki7_cc(mOx={0pYJPMQH-1L~dtwc}xMuoq}C1uv)in=DG=9<9{7L zmsF|2@XlozcAVgj@dEx(cU1q{rE7Az`Yz)O=(V$4dqAc^aolbh0y(YhTEAw0QfAx0!(l@=_Y z*kUszVz~$L-0_LJ+1!SjJYjn`cbGq7Y0>aPZ-yrEoN>(SMSRNQQ--ZOT*T~!o8s0G z{Dv{7Xx=jazA1f{c^gtL^tz%+!8GYOnYa$ah=J~0TZXRNrZeRC<`Q6Kxd+LlZW^mB zu*_RJr)zCoH+NxeqKVAySz1e5HI;d@mv?f-hHgXD>x5&ozZlbYFWW$p^uvfu));b$ z$pn3xJi9rn6Vq&tDmHw8u_#mM9;Q&&32+vW0{{zCRqj#(&tQL5Ef;z=Hdm$V0RR*NnF!O?#0A4 zhCYtG`9 z{|meS_X{;9{zGkP{NwoONvk3hZyc^Bjjkb@vcK^_M=0g|ZqU_)@vIrw}D?t_^ zHhkU=tzC?a^LV7SK{#?Z$Ri++fjkTHCdi8*`#?rOJ_LCWc+xxYc^k-2 zAU}fq6=VmaQZWTTM1zbI{;lnEm@#>@+ho zzc+d*Yp6Fb@BDCnO!kynM=!D~Jp`|f8QboO>Ro$C%pjtoSEOXmApH8S{4MY2<@5H^ z8E^Nro9#qb#+TXaPA@Z5nG&KNd|1;y*S2~sZ_F|&(b)SHgzti5A>7T zVqw{7ZU^V)yL*yM3$mkD@aNiV+Lw2pqW#?r-pwcFdTeu699>V0UvAQ$6*Ki_-rhS8 zLd57qTXi-;EnWKuqW#9S-j&$em6nOvOM6~9ZCcwu{>X+-Prk0|*cQWYY~)qgLN2L% z>k5H>Y~d6lHt|x%svA-FvaJRWeYfENx7U+_4IWxp8I`>x_N5((?Iz3G(>t6k^`-a0 z>fTb1;W1}l@#I9sc8;N=x^3pctk*MgH0wH#il2Q(K%}{ZIWLV_m|H`eSByEUJ2{2V zecwP_8-=fa_%q^K-6MvBqM;k>%p2k^Vm4i$%bhVkd->^e%ZIcAY7@9~RI}dek>DELaO066(KfvGg zD$R6LkJDQ-r`BW9D+ZH1N-*AjV6sWOWy-Qfx5u)TzLCI)B#UE(WZYG6cBFn{$m+56 z&s#d~6{AM;he_@+a}!n(1uHU))SY*7hsk%c`_~`Hpf^N!?TqfsAhLUxofTV56dxCr z5p|-tYjyN15V-NFTV5%81YZ8&+*_7sXRQ#I6RV1s8WtMQ%}jb=QT`T2gGH~zqcU?X z{U-_sGdzzNrcTb%k>;pF8{4zG^Y7crar^37gkxJy|HhW~R?Un!Fy#Y#ouLzS|0`+C zN)96*U%7mlzTOh`QW}<#ows^z^jQM8^haWEK=d8mQU5^nn7Ye|t~h^&^Ydo*SbXwd zzQmKyeiDK6UoY|hu<-vaoNpNu{|Urh*`sKF8rQH9luL}DTmqobB*481kt0_{j*NpN zW{~k9NgxwJI1m$v1%wB&f~0^<0Z9gl2Qh+72AK+y0eF@=I!+8ryYlznMrRpUPMQeg zl~0jj{rV`J{O9P5>&@&nZQbN5C%0mq>l;?Coq1Xu8{dC;%I?eqrx;&fka9iLYop@` z%fpm2-|+hrvv+KXH_9jbUZPzm2ez**95g<(!9K;5w{q#HkJx*~{0Z5;9&}%8xvOXV zVN=}8v)7wvFXuZ?WtJbwTyx{F(OxsVCwsDS>ZvN2Wd5oA|M1nVA`zHe9=am=JxK&wDe3 zrY;}OfzhGjq?n*G|knVOn-Z?&S}Z;*NA@g3kF^b-CeF{*~@UJrl$; zsr$Cn{Z=@YT3MVrcH`2jYTFa8(}FhhA=?z;olnoqpbQ03n~zWzZx$fF&oivfx%$l1 z(w$u|%Qlj~LEN2L__m>(KH4yKnf0u@<-F;@8GdH(1-2cFPg*l+=MDh-0kZZuzjeI3 zrhEkvvU#;LD<7b>d)L;_UX%7x{p=n4s!MOEGB{52AMd?ztw8^ z@L~7<{JD2^?JIAd-2Iwq3ZboC$&e*IU*_?I;e79UKK%{awku+_$p`NG#abRCG+kjx zK1A$6ee$Z_sN^4eqF@dG+N}SBvH!a@JpMN`F;jHprB|;QPhs(tHR>`OW5r4>(22_j zA{4inD%@o}N@0N%G)8o;%CPLX!YQM#$Dj}~gkT{3C;ifb$ z@$%POM$bwAa&+|Tt%*M1AK~)E2oISEw~SIyxBTnT%MINU47ZG44DXk^RQKrh@Za~e zg#YV=>Rbf>Lq_B|UU+lgEe1t+Hf*<0ZcF3-(N<(p{kpB6kN)?z^)uV@>-K(m=D)Yi ze@-WV-SV%`|4aL@8+TTAgHVb0H#&?>5-qQfevdw7^n8&Zv*JsW2IP2!avjw^4@^z7Cv@NKc*}R~>ywP2#v|YJ3 z)9F><*;QBWpPA{bx6Q1dQ8m+5aQS{`R`KG7y7G+6&(Boc1^MudDo>Y-;{-^%b7Swv2)kXiC{h8;SQQqP%P+FbxXuDpqt~St+KRYz4HyB5eVrz9} zDH&qj?9gbxeKrrwfyb3OJ433sDUq=j`D#OAU3qH*=%v&y$T0Sxt5Er+E?XCPTa*Pe z=3PEtSDUe*uFB&sa75-fVn#q zhAdA;Nr4htkmXTa;pWkHTm|`HQ~4dgoHMa*W=8pd0FN5QC$QZU@qbQ zj0F)}DuDSfXrEnbkJ#!zY^XNt=eFDBpWAKlDvGn9s__?gSl>{m_{73_soyGbp5dHP6q#qxTYJj_ zU$MvKYaU&HU?cFkxYZpV)pu4yeR0RAokeVFk)xw@kq|1g7lY0P`5%33P+Y$97H3xW z=rhXX>~^?LZUn3WoDzh>EA|<|mruZwf~7C?oQ56n}&c0KFXe=CDVA zPq2-G*67E`S3xKPu|;$xD3x&O=g_jyI5Hx-T^9sM}^$sV~j-~F>42Nw9s zT7F~j3s~SD-iu}Gj-qG))*UvAQ-67UjMRYlOP>qad_RsxuPQy>T~A>sfcjs4e6^XC6s{fzd8_TR g^ie0rU2$UEW%o8-fh^@;{$YQF7mh07e~0}4056XV7ytkO literal 0 HcmV?d00001 diff --git a/tests/common/storages/samples/sample.txt b/tests/common/storages/samples/sample.txt new file mode 100644 index 0000000000..fd1ed111cb --- /dev/null +++ b/tests/common/storages/samples/sample.txt @@ -0,0 +1 @@ +dlthub content \ No newline at end of file diff --git a/tests/common/storages/test_local_filesystem.py b/tests/common/storages/test_local_filesystem.py new file mode 100644 index 0000000000..a8cdc96458 --- /dev/null +++ b/tests/common/storages/test_local_filesystem.py @@ -0,0 +1,31 @@ +import os +import itertools +import pytest + + + +from dlt.common.storages import fsspec_from_config, FilesystemConfiguration +from dlt.common.storages.fsspec_filesystem import glob_files + +from tests.common.storages.utils import assert_sample_files + +TEST_SAMPLE_FILES = "tests/common/storages/samples" + + +@pytest.mark.parametrize("bucket_url,load_content", itertools.product(["file:///", "/", ""], [True, False])) +def test_filesystem_dict_local(bucket_url: str, load_content: bool) -> None: + if bucket_url in ["file://", ""]: + # relative paths + bucket_url += TEST_SAMPLE_FILES + else: + bucket_url += os.path.abspath(TEST_SAMPLE_FILES)[1:] + + print(bucket_url) + config = FilesystemConfiguration(bucket_url=bucket_url) + filesystem, _ = fsspec_from_config(config) + # use glob to get data + try: + all_file_items = list(glob_files(filesystem, bucket_url)) + assert_sample_files(all_file_items, filesystem, config, load_content) + except NotImplementedError as ex: + pytest.skip("Skipping due to " + str(ex)) diff --git a/tests/common/storages/utils.py b/tests/common/storages/utils.py new file mode 100644 index 0000000000..14fffb668d --- /dev/null +++ b/tests/common/storages/utils.py @@ -0,0 +1,57 @@ +from typing import List +from fsspec import AbstractFileSystem +import pandas +from pyarrow import parquet + +from dlt.common import pendulum +from dlt.common.storages import FilesystemConfiguration +from dlt.common.storages.fsspec_filesystem import FileItem, FileItemDict + + +def assert_sample_files(all_file_items: List[FileItem], filesystem: AbstractFileSystem, config: FilesystemConfiguration, load_content: bool) -> None: + for item in all_file_items: + assert isinstance(item["file_name"], str) + assert item["file_url"].endswith(item["file_name"]) + assert item["file_url"].startswith(config.protocol) + assert isinstance(item["mime_type"], str) + assert isinstance(item["size_in_bytes"], int) + assert isinstance(item["modification_date"], pendulum.DateTime) + content = filesystem.read_bytes(item["file_url"]) + assert len(content) == item["size_in_bytes"] + if load_content: + item["file_content"] = content + + # create file dict + file_dict = FileItemDict(item, config.credentials) + dict_content = file_dict.read_bytes() + assert content == dict_content + with file_dict.open() as f: + assert content == f.read() + # read via various readers + if item["mime_type"] == "text/csv": + with file_dict.open() as f: + df = pandas.read_csv(f, header="infer") + assert len(df.to_dict(orient="records")) > 0 + if item["mime_type"] == "application/parquet": + with file_dict.open() as f: + table = parquet.ParquetFile(f).read() + assert len(table.to_pylist()) + if item["mime_type"].startswith("text"): + with file_dict.open(mode="rt") as f_txt: + lines = f_txt.readlines() + assert len(lines) >= 1 + assert isinstance(lines[0], str) + + assert len(all_file_items) == 10 + assert [item["file_name"] for item in all_file_items] == [ + 'csv/freshman_kgs.csv', + 'csv/freshman_lbs.csv', + 'csv/mlb_players.csv', + 'csv/mlb_teams_2012.csv', + 'jsonl/mlb_players.jsonl', + 'met_csv/A801/A881_20230920.csv', + 'met_csv/A803/A803_20230919.csv', + 'met_csv/A803/A803_20230920.csv', + 'parquet/mlb_players.parquet', + 'sample.txt' + ] \ No newline at end of file diff --git a/tests/load/filesystem/test_filesystem_common.py b/tests/load/filesystem/test_filesystem_common.py index 2e002e548b..6e1fe5c549 100644 --- a/tests/load/filesystem/test_filesystem_common.py +++ b/tests/load/filesystem/test_filesystem_common.py @@ -7,10 +7,11 @@ from dlt.common import pendulum from dlt.common.configuration.specs import AzureCredentials, AzureCredentialsWithoutDefaults from dlt.common.storages import fsspec_from_config, FilesystemConfiguration -from dlt.common.storages.fsspec_filesystem import MTIME_DISPATCH +from dlt.common.storages.fsspec_filesystem import MTIME_DISPATCH, glob_files from dlt.common.utils import uniq_id from tests.utils import preserve_environ, autouse_test_storage +from tests.common.storages.utils import assert_sample_files @with_config(spec=FilesystemConfiguration, sections=("destination", "filesystem")) @@ -46,4 +47,20 @@ def test_filesystem_instance(all_buckets_env: str) -> None: # print(MTIME_DISPATCH[config.protocol](details)) assert (MTIME_DISPATCH[config.protocol](details) - now).seconds < 60 finally: - filesystem.rm(file_url) \ No newline at end of file + filesystem.rm(file_url) + + +@pytest.mark.parametrize("load_content", (True, False)) +def test_filesystem_dict(all_buckets_env: str, load_content: bool) -> None: + bucket_url = os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] + config = get_config() + if config.protocol in ["memory", "file"]: + pytest.skip(f"{config.protocol} not supported in this test") + glob_folder = "standard_source" + filesystem, _ = fsspec_from_config(config) + # use glob to get data + try: + all_file_items = list(glob_files(filesystem, posixpath.join(bucket_url, glob_folder, "samples"))) + assert_sample_files(all_file_items, filesystem, config, load_content) + except NotImplementedError as ex: + pytest.skip("Skipping due to " + str(ex)) From b9cb2f2e18708f281e0d89cb033433cb01b31bb6 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 9 Oct 2023 22:45:28 +0200 Subject: [PATCH 14/15] fixes glob pattern --- dlt/common/storages/fsspec_filesystem.py | 2 +- tests/common/storages/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index 155b111f1e..93d3d37bbc 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -178,7 +178,7 @@ def guess_mime_type(file_name: str) -> str: def glob_files( - fs_client: AbstractFileSystem, bucket_url: str, file_glob: str = "**/*" + fs_client: AbstractFileSystem, bucket_url: str, file_glob: str = "**" ) -> Iterator[FileItem]: """Get the files from the filesystem client. diff --git a/tests/common/storages/utils.py b/tests/common/storages/utils.py index 14fffb668d..6900c6fdcf 100644 --- a/tests/common/storages/utils.py +++ b/tests/common/storages/utils.py @@ -43,7 +43,7 @@ def assert_sample_files(all_file_items: List[FileItem], filesystem: AbstractFile assert isinstance(lines[0], str) assert len(all_file_items) == 10 - assert [item["file_name"] for item in all_file_items] == [ + assert set([item["file_name"] for item in all_file_items]) == { 'csv/freshman_kgs.csv', 'csv/freshman_lbs.csv', 'csv/mlb_players.csv', @@ -54,4 +54,4 @@ def assert_sample_files(all_file_items: List[FileItem], filesystem: AbstractFile 'met_csv/A803/A803_20230920.csv', 'parquet/mlb_players.parquet', 'sample.txt' - ] \ No newline at end of file + } \ No newline at end of file From 321be274659cd7c64e7f649ed9997a83ca162a3a Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 10 Oct 2023 00:13:12 +0200 Subject: [PATCH 15/15] fixes globbing on windows --- dlt/common/storages/configuration.py | 7 ++++++- dlt/common/storages/fsspec_filesystem.py | 7 ++++++- tests/common/storages/test_local_filesystem.py | 13 +++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/dlt/common/storages/configuration.py b/dlt/common/storages/configuration.py index 8931a461d0..699465ce4a 100644 --- a/dlt/common/storages/configuration.py +++ b/dlt/common/storages/configuration.py @@ -1,3 +1,4 @@ +import os from urllib.parse import urlparse from typing import TYPE_CHECKING, Any, Literal, Optional, Type, get_args, ClassVar, Dict, Union @@ -74,7 +75,11 @@ class FilesystemConfiguration(BaseConfiguration): def protocol(self) -> str: """`bucket_url` protocol""" url = urlparse(self.bucket_url) - return url.scheme or "file" + # this prevents windows absolute paths to be recognized as schemas + if not url.scheme or (os.path.isabs(self.bucket_url) and "\\" in self.bucket_url): + return "file" + else: + return url.scheme def on_resolved(self) -> None: url = urlparse(self.bucket_url) diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index 93d3d37bbc..c084fcc12e 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -190,8 +190,10 @@ def glob_files( Returns: Iterable[FileItem]: The list of files. """ + import os bucket_url_parsed = urlparse(bucket_url) - if not bucket_url_parsed.scheme: + # if this is file path without scheme + if not bucket_url_parsed.scheme or (os.path.isabs(bucket_url) and "\\" in bucket_url): # this is a file so create a proper file url bucket_url = pathlib.Path(bucket_url).absolute().as_uri() bucket_url_parsed = urlparse(bucket_url) @@ -207,6 +209,9 @@ def glob_files( for file, md in glob_result.items(): if md["type"] != "file": continue + # make that absolute path on a file:// + if bucket_url_parsed.scheme == "file" and not file.startswith("/"): + file = "/" + file file_name = posixpath.relpath(file, bucket_path) file_url = bucket_url_parsed.scheme + "://" + file yield FileItem( diff --git a/tests/common/storages/test_local_filesystem.py b/tests/common/storages/test_local_filesystem.py index a8cdc96458..e9550a3173 100644 --- a/tests/common/storages/test_local_filesystem.py +++ b/tests/common/storages/test_local_filesystem.py @@ -1,8 +1,7 @@ import os import itertools import pytest - - +import pathlib from dlt.common.storages import fsspec_from_config, FilesystemConfiguration from dlt.common.storages.fsspec_filesystem import glob_files @@ -14,13 +13,15 @@ @pytest.mark.parametrize("bucket_url,load_content", itertools.product(["file:///", "/", ""], [True, False])) def test_filesystem_dict_local(bucket_url: str, load_content: bool) -> None: - if bucket_url in ["file://", ""]: + if bucket_url in [""]: # relative paths - bucket_url += TEST_SAMPLE_FILES + bucket_url = TEST_SAMPLE_FILES else: - bucket_url += os.path.abspath(TEST_SAMPLE_FILES)[1:] + if bucket_url == "/": + bucket_url = os.path.abspath(TEST_SAMPLE_FILES) + else: + bucket_url = pathlib.Path(TEST_SAMPLE_FILES).absolute().as_uri() - print(bucket_url) config = FilesystemConfiguration(bucket_url=bucket_url) filesystem, _ = fsspec_from_config(config) # use glob to get data