2025-09-16 16:49:54 +09:00

96 lines
3.2 KiB
Python

import sys
import os
import pandas as pd
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
from dotenv import load_dotenv
load_dotenv(".env")
import lib.custom_logger as get_logger
logger = get_logger.get_logger(level=10)
from models.csv_rss_item import RSSItem
from lib.rss_reader_client import RSSReaderClient
from lib.csv_collector import CSVWriter,CSVReader,CSVEditMapper,CSVAnalyzer
from utils.translate_deepl import DeepLTranslateClient
def example_fetch():
url="https://openai.com/news/rss.xml"
items = RSSReaderClient.fetch(url,from_at="2025-09-12 21:00:00+09:00")
logger.info(f"Fetched {len(items)} items")
# example_fetch()
def example_writer():
url="https://openai.com/news/rss.xml"
items = RSSReaderClient.fetch(url)
csv_data = RSSItem.to_csv_from_items(items)
CSVWriter.write(
records=csv_data,
domain="tech/ai",
layer="bronze",
event="openai_news",
is_year=True, is_month=True, part=1,
)
example_writer()
def example_reader():
client = DeepLTranslateClient()
file_path = "data/tech/ai/bronze/y=2025/m=09/openai_news_2025-09-15_part-001.csv"
data = CSVReader.read(file_path)
header_map = CSVReader.header_map(data[0])
logger.info(f"header_map: {header_map}")
mapper = CSVEditMapper(header_map=header_map)
mapper.add_column("uid")
mapper.add_column("title")
mapper.add_column("link")
mapper.add_column("summary")
def call_back_text_ja(row_idx:int,row:list,header_map:dict) -> str:
title = mapper.get_column_values("title",row)
summary = mapper.get_column_values("summary",row)
val = f"{title}\n\n{summary}"
val_ja = client.translate(val, from_lang="en", to_lang="ja")
return val_ja
mapper.add_callback("text_ja", call_back_text_ja)
mapper.add_column("published_at", key_name="published_parsed")
edited_data = mapper.edit(data)
edit_filename = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit01.csv"
CSVWriter.write_with_filename(
records=edited_data,
filename=edit_filename,
is_update=False
)
# example_reader()
def example_reader2():
file_path = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit01.csv"
data = CSVReader.read(file_path)
header_map = CSVReader.header_map(data[0])
logger.info(f"header_map: {header_map}")
mapper = CSVEditMapper(header_map=header_map)
mapper.auto_columns()
mapper.add_value("created_at", value="2025-09-15 00:00:00+00:00")
edited_data = mapper.edit(data)
edit_filename = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit02.csv"
CSVWriter.write_with_filename(
records=edited_data,
filename=edit_filename,
is_update=False
)
# example_reader2()
def example_edit_priod():
file_path = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit02.csv"
data = CSVReader.read(file_path)
CSVAnalyzer.write_separated_month(
data,
domain="tech/ai",
layer="silver",
event="openai_news",
)
# example_edit_priod()