collect-data-pipeline/examples/example_scraper.py
2025-09-16 16:49:54 +09:00

37 lines
957 B
Python

import sys
import os
import pandas as pd
import asyncio
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
from dotenv import load_dotenv
load_dotenv(".env")
from lib.custom_logger import get_logger
logger = get_logger(level=10)
from providers.scraper.anthropic_scraper_provider import AnthropicScraperProvider
from models.csv_scrape_item import ScrapeItem
from lib.csv_collector import CSVWriter
def example_scraper():
client = AnthropicScraperProvider()
items = client.crawl_sync()
logger.info(f"Scraped {len(items)} items")
csv_data = ScrapeItem.to_csv_from_items(items)
CSVWriter.write(
records=csv_data,
domain="tech/ai",
layer="bronze",
event="anthropic_news",
is_year=True, is_month=True, part=1,
)
# async def run():
# async with httpx.AsyncClient() as client:
example_scraper()