python-common-code/example/example_duck_gcs.py
2025-10-26 17:10:27 +09:00

87 lines
2.8 KiB
Python

"""
HMACキーがひつようになる
* Google Cloud Consoleで発行する
* https://console.cloud.google.com/storage/settings
* 「相互運用性(Interoperability)」タブを開く
* 「HMACキーを作成」ボタンを押す
* 使いたいサービスアカウントを選択
"""
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),"..", "src")))
from dotenv import load_dotenv
load_dotenv()
from lib.custom_logger import get_logger
logger = get_logger(level=10)
import duckdb
from providers.google_cloud_storage_provider import GoogleCloudStorageProvider
from providers.duck_db_provider import DuckDBProvider
def example_init_google_cloud_storage():
logger.info("Starting example_google_cloud_storage function.")
gcs_provider = GoogleCloudStorageProvider(
cred_path="./keys/google_service_accout.json",
)
csv_data = """id,name,age,city,score,created_at
1,Alice,25,Tokyo,88,2025-10-01T09:00:00Z
2,Bob,30,Osaka,75,2025-10-02T09:30:00Z
3,Charlie,28,Nagoya,92,2025-10-03T10:00:00Z
4,David,35,Fukuoka,64,2025-10-04T11:15:00Z
5,Eva,22,Sapporo,80,2025-10-05T12:45:00Z
"""
gcs_provider.write_item("datasource-example-251018",
"example/y=2025/m=10/example.csv",
csv_data.encode("utf-8"),"text/csv")
buckets = gcs_provider.get_buckets()
logger.info(f"Buckets: {buckets}")
def example_duckdb_cloud_raw():
logger.info("Starting example_duckdb_cloud_raw function.")
# DuckDB接続
con = duckdb.connect()
con.sql(f"""
CREATE OR REPLACE SECRET gcs_creds (
TYPE gcs,
KEY_ID {os.getenv('GCP_STORAGE_HMAC_ACCESS_KEY')},
SECRET {os.getenv('GCP_STORAGE_HMAC_SECRET_KEY')}
);
""")
query = f"""
SELECT * FROM read_csv_auto('gs://datasource-example-251018/example/y=2025/m=10/example.csv');
"""
result = con.execute(query).df()
logger.info(f"Read {len(result)} rows from GCS file.")
def example_duckdb_cloud_class():
logger.info("Starting example_duckdb_cloud_class function.")
# DuckDB接続
provider = DuckDBProvider()
provider.setup_gcs(
access_key=os.getenv('GCP_STORAGE_HMAC_ACCESS_KEY'),
secret_key=os.getenv('GCP_STORAGE_HMAC_SECRET_KEY'),
)
bucket_name = "datasource-example-251018"
object_name = "example/y=2025/m=*/example.csv"
query = f"""
SELECT * FROM {provider.get_gs_csv_name(bucket_name, object_name)};
"""
result = provider.query_df(query)
logger.info(f"Read {len(result)} rows from GCS file using DuckDBProvider.")
# example_init_google_cloud_storage()
# example_duckdb_cloud_raw()
example_duckdb_cloud_class()