collect-data-pipeline/examples/example_duckdb.py
2025-09-16 16:49:54 +09:00

30 lines
786 B
Python

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
import lib.custom_logger as get_logger
logger = get_logger.get_logger(level=10)
from providers.duck_db_provider import DuckDBProvider
def example_duckdb():
logger.info("Starting example_duckdb function.")
file_path = "data/tech/ai/bronze/y=2025/m=*/openai_news_*.csv"
provider = DuckDBProvider()
result = provider.max_value(
file_path=file_path,
column="published_parsed",
)
print("latest published_parsed:", result)
example_duckdb()
# con.execute(f"CREATE TABLE IF NOT EXISTS data AS SELECT * FROM read_csv_auto('{file_path}')")
# logger.info("Table 'data' created successfully.")