import sys import os import pandas as pd sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))) from dotenv import load_dotenv load_dotenv(".env") import lib.custom_logger as get_logger logger = get_logger.get_logger(level=10) from models.csv_rss_item import RSSItem from lib.rss_reader_client import RSSReaderClient from lib.csv_collector import CSVWriter,CSVReader,CSVEditMapper,CSVAnalyzer from utils.translate_deepl import DeepLTranslateClient def example_fetch(): url="https://openai.com/news/rss.xml" items = RSSReaderClient.fetch(url,from_at="2025-09-12 21:00:00+09:00") logger.info(f"Fetched {len(items)} items") # example_fetch() def example_writer(): url="https://openai.com/news/rss.xml" items = RSSReaderClient.fetch(url) csv_data = RSSItem.to_csv_from_items(items) CSVWriter.write( records=csv_data, domain="tech/ai", layer="bronze", event="openai_news", is_year=True, is_month=True, part=1, ) example_writer() def example_reader(): client = DeepLTranslateClient() file_path = "data/tech/ai/bronze/y=2025/m=09/openai_news_2025-09-15_part-001.csv" data = CSVReader.read(file_path) header_map = CSVReader.header_map(data[0]) logger.info(f"header_map: {header_map}") mapper = CSVEditMapper(header_map=header_map) mapper.add_column("uid") mapper.add_column("title") mapper.add_column("link") mapper.add_column("summary") def call_back_text_ja(row_idx:int,row:list,header_map:dict) -> str: title = mapper.get_column_values("title",row) summary = mapper.get_column_values("summary",row) val = f"{title}\n\n{summary}" val_ja = client.translate(val, from_lang="en", to_lang="ja") return val_ja mapper.add_callback("text_ja", call_back_text_ja) mapper.add_column("published_at", key_name="published_parsed") edited_data = mapper.edit(data) edit_filename = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit01.csv" CSVWriter.write_with_filename( records=edited_data, filename=edit_filename, is_update=False ) # example_reader() def example_reader2(): file_path = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit01.csv" data = CSVReader.read(file_path) header_map = CSVReader.header_map(data[0]) logger.info(f"header_map: {header_map}") mapper = CSVEditMapper(header_map=header_map) mapper.auto_columns() mapper.add_value("created_at", value="2025-09-15 00:00:00+00:00") edited_data = mapper.edit(data) edit_filename = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit02.csv" CSVWriter.write_with_filename( records=edited_data, filename=edit_filename, is_update=False ) # example_reader2() def example_edit_priod(): file_path = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit02.csv" data = CSVReader.read(file_path) CSVAnalyzer.write_separated_month( data, domain="tech/ai", layer="silver", event="openai_news", ) # example_edit_priod()