Skip to content

DataSources

DataSources

Validates and Abstract the access to data sources

Source code in ddataflow/data_sources.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class DataSources:
    """
    Validates and Abstract the access to data sources
    """

    def __init__(
        self, *, config, local_folder: str, snapshot_path: str, size_limit: int
    ):
        self.config = config
        self.data_source: Dict[str, Any] = {}
        self.download_folder = local_folder
        for data_source_name, data_source_config in self.config.items():
            self.data_source[data_source_name] = DataSource(
                name=data_source_name,
                config=data_source_config,
                local_data_folder=local_folder,
                snapshot_path=snapshot_path,
                size_limit=size_limit,
            )

    def all_data_sources_names(self) -> List[str]:
        return list(self.data_source.keys())

    def get_data_source(self, name) -> DataSource:
        if name not in self.data_source:
            raise Exception(f"Data source does not exist {name}")
        return self.data_source[name]

    def get_filter(self, data_source_name: str):
        return self.config[data_source_name]["query"]

    def get_parquet_name(self, data_source_name: str):
        return self.config[data_source_name]["parquet_name"]