37 lines
957 B
Python
37 lines
957 B
Python
import sys
|
|
import os
|
|
import pandas as pd
|
|
import asyncio
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
|
|
|
|
from dotenv import load_dotenv
|
|
load_dotenv(".env")
|
|
|
|
from lib.custom_logger import get_logger
|
|
logger = get_logger(level=10)
|
|
|
|
from providers.scraper.anthropic_scraper_provider import AnthropicScraperProvider
|
|
from models.csv_scrape_item import ScrapeItem
|
|
from lib.csv_collector import CSVWriter
|
|
|
|
|
|
def example_scraper():
|
|
client = AnthropicScraperProvider()
|
|
items = client.crawl_sync()
|
|
logger.info(f"Scraped {len(items)} items")
|
|
csv_data = ScrapeItem.to_csv_from_items(items)
|
|
CSVWriter.write(
|
|
records=csv_data,
|
|
domain="tech/ai",
|
|
layer="bronze",
|
|
event="anthropic_news",
|
|
is_year=True, is_month=True, part=1,
|
|
)
|
|
|
|
|
|
# async def run():
|
|
# async with httpx.AsyncClient() as client:
|
|
|
|
|
|
|
|
example_scraper() |