Validates and Abstract the access to data sources
Source code in ddataflow/data_sources.py
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 | class DataSources:
"""
Validates and Abstract the access to data sources
"""
def __init__(
self, *, config, local_folder: str, snapshot_path: str, size_limit: int
):
self.config = config
self.data_source: Dict[str, Any] = {}
self.download_folder = local_folder
for data_source_name, data_source_config in self.config.items():
self.data_source[data_source_name] = DataSource(
name=data_source_name,
config=data_source_config,
local_data_folder=local_folder,
snapshot_path=snapshot_path,
size_limit=size_limit,
)
def all_data_sources_names(self) -> List[str]:
return list(self.data_source.keys())
def get_data_source(self, name) -> DataSource:
if name not in self.data_source:
raise Exception(f"Data source does not exist {name}")
return self.data_source[name]
def get_filter(self, data_source_name: str):
return self.config[data_source_name]["query"]
def get_parquet_name(self, data_source_name: str):
return self.config[data_source_name]["parquet_name"]
|