96 lines
3.2 KiB
Python
96 lines
3.2 KiB
Python
import sys
|
|
import os
|
|
import pandas as pd
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
|
|
|
|
from dotenv import load_dotenv
|
|
load_dotenv(".env")
|
|
|
|
import lib.custom_logger as get_logger
|
|
logger = get_logger.get_logger(level=10)
|
|
|
|
from models.csv_rss_item import RSSItem
|
|
from lib.rss_reader_client import RSSReaderClient
|
|
from lib.csv_collector import CSVWriter,CSVReader,CSVEditMapper,CSVAnalyzer
|
|
from utils.translate_deepl import DeepLTranslateClient
|
|
|
|
def example_fetch():
|
|
url="https://openai.com/news/rss.xml"
|
|
items = RSSReaderClient.fetch(url,from_at="2025-09-12 21:00:00+09:00")
|
|
logger.info(f"Fetched {len(items)} items")
|
|
|
|
# example_fetch()
|
|
|
|
def example_writer():
|
|
url="https://openai.com/news/rss.xml"
|
|
items = RSSReaderClient.fetch(url)
|
|
csv_data = RSSItem.to_csv_from_items(items)
|
|
CSVWriter.write(
|
|
records=csv_data,
|
|
domain="tech/ai",
|
|
layer="bronze",
|
|
event="openai_news",
|
|
is_year=True, is_month=True, part=1,
|
|
)
|
|
example_writer()
|
|
|
|
def example_reader():
|
|
client = DeepLTranslateClient()
|
|
file_path = "data/tech/ai/bronze/y=2025/m=09/openai_news_2025-09-15_part-001.csv"
|
|
data = CSVReader.read(file_path)
|
|
header_map = CSVReader.header_map(data[0])
|
|
logger.info(f"header_map: {header_map}")
|
|
mapper = CSVEditMapper(header_map=header_map)
|
|
mapper.add_column("uid")
|
|
mapper.add_column("title")
|
|
mapper.add_column("link")
|
|
mapper.add_column("summary")
|
|
def call_back_text_ja(row_idx:int,row:list,header_map:dict) -> str:
|
|
title = mapper.get_column_values("title",row)
|
|
summary = mapper.get_column_values("summary",row)
|
|
val = f"{title}\n\n{summary}"
|
|
val_ja = client.translate(val, from_lang="en", to_lang="ja")
|
|
return val_ja
|
|
mapper.add_callback("text_ja", call_back_text_ja)
|
|
mapper.add_column("published_at", key_name="published_parsed")
|
|
edited_data = mapper.edit(data)
|
|
edit_filename = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit01.csv"
|
|
CSVWriter.write_with_filename(
|
|
records=edited_data,
|
|
filename=edit_filename,
|
|
is_update=False
|
|
)
|
|
|
|
# example_reader()
|
|
|
|
def example_reader2():
|
|
file_path = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit01.csv"
|
|
data = CSVReader.read(file_path)
|
|
header_map = CSVReader.header_map(data[0])
|
|
logger.info(f"header_map: {header_map}")
|
|
mapper = CSVEditMapper(header_map=header_map)
|
|
mapper.auto_columns()
|
|
mapper.add_value("created_at", value="2025-09-15 00:00:00+00:00")
|
|
edited_data = mapper.edit(data)
|
|
edit_filename = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit02.csv"
|
|
CSVWriter.write_with_filename(
|
|
records=edited_data,
|
|
filename=edit_filename,
|
|
is_update=False
|
|
)
|
|
|
|
# example_reader2()
|
|
|
|
def example_edit_priod():
|
|
file_path = "data/tech/ai/silver_work/y=2025/m=09/openai_news_2025-09-15_part-001_edit02.csv"
|
|
data = CSVReader.read(file_path)
|
|
CSVAnalyzer.write_separated_month(
|
|
data,
|
|
domain="tech/ai",
|
|
layer="silver",
|
|
event="openai_news",
|
|
)
|
|
|
|
# example_edit_priod()
|
|
|