チャンク処理

2025-09-10 21:52:57 +09:00 · 2025-09-10 21:52:57 +09:00 · 1e808cb472
commit 1e808cb472
parent ea17845fe1
26 changed files with 932 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
+.output/
+
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
@ -15,7 +17,6 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
 lib64/
 parts/
 sdist/
--- a/README.md
+++ b/README.md
@ -1,2 +1,18 @@
 # speech-to-text-pipeline

+```bat
+python src\main.py data\samples\task-smp.mp3
+```
+
+
+## セットアップ方法
+
+**前提条件**
+
+システムに `ffmpeg` がインストールされている必要があります。
+
+```text
+- macOS: `brew install ffmpeg`
+- Ubuntu: `sudo apt install ffmpeg`
+- Windows: `choco install ffmpeg`(または手動でPATHを通す)
+```
--- a/data/samples/interview_aps-smp.mp3
+++ b/data/samples/interview_aps-smp.mp3
--- a/data/samples/interview_aps-smp.txt
+++ b/data/samples/interview_aps-smp.txt
@ -0,0 +1,168 @@
+%講演ID:D04M0041
+%
+%<SOT>
+%%【略】
+0003 00008.805-00012.085 L:
+質問させていただきます                        & シツモンサセテイタダキマス                   
+(F あの)                                      & (F アノ)                                     
+読んだんですけれども                          & ヨンダンデスケレドモ                         
+0004 00009.417-00009.838 R:
+(F うん)                                      & (F <VN>)                                     
+0005 00011.770-00012.901 R:
+(F うん)                                      & (F <VN>)                                     
+分からなかった                                & ワカラナカッタ                               
+0006 00012.536-00013.221 L:
+大抵の                                        & タイ(笑 テーノ)                              
+0007 00013.250-00014.315 R:<笑>
+0008 00013.698-00016.817 L:
+(F あのー)                                    & (F アノー)                                   
+理解には                                      & リカイニワ                                   
+遠く                                          & トーク                                       
+及ばずという                                  & オヨバズトユー                               
+感じで                                        & カンジデ                                     
+0009 00017.156-00018.411 L:
+(F あのー)                                    & (F アノー)                                   
+言葉の                                        & コトバノ                                     
+意味                                          & イミ                                         
+0010 00018.861-00020.915 L:
+から                                          & カラ                                         
+お聞きしたいと                                & オキキシタイト                               
+思うんですけど                                & オモウンデスケド                             
+0011 00019.775-00020.137 R:
+(F はい)(F はい)                              & (F (? ハ)イ)(F ハイ)                         
+0012 00020.603-00020.823 R:
+(F うん)                                      & (F <VN>)                                     
+0013 00021.484-00022.802 L:
+パラ言語情報                                  & パラゲンゴジョーホー                         
+0014 00023.223-00029.484 L:
+っていう                                      & ッテ(W ユ;ユウ)                              
+言葉と                                        & コトバト                                     
+後                                            & アト                                         
+ホルマント                                    & (W フォルマント;ホルマント)                  
+後                                            & アト                                         
+調音運動っていう                              & チョーオ(? ン)ウンドーッテユー               
+ことについて                                  & コトニツイテ                                 
+まず                                          & マズ                                         
+初めに                                        & ハジメニ                                     
+聞かせてください                              & キカセテクダサイ                             
+0015 00029.360-00030.235 R:
+三つね                                        & ミッツネ<H>                                  
+0016 00030.120-00030.355 L:
+(F はい)                                      & (F ハイ)                                     
+0017 00030.360-00031.091 R:
+(F あのー)                                    & (F アノー)                                   
+0018 00030.469-00030.950 L:<笑>
+0019 00031.758-00034.406 R:
+言語ってのは                                  & ゲンゴッテノワ                               
+分かりますよね                                & ワカリマスヨネ<H>                            
+言葉ですよね                                  & コトバデスヨネ                               
+0020 00034.287-00034.610 L:
+(F はい)                                      & (F ハイ)                                     
+0021 00034.608-00035.659 R:
+そんで                                        & (W ウン;ソン)デ                              
+言語情報                                      & ゲンゴジョーホー                             
+0022 00035.863-00038.959 R:
+っていうのはね                                & (? ッテユー)ノワネ<H>                        
+(F まー)                                      & (F マー)                                     
+簡単に                                        & カンタンニ                                   
+言えば                                        & イエバ                                       
+単語の                                        & タンゴノ                                     
+意味                                          & イミ                                         
+0023 00039.234-00039.547 L:
+(F はい)                                      & (F ハイ)                                     
+0024 00040.215-00044.337 R:
+(F あーのー)                                  & (F アーノー)                                 
+辞書に                                        & ジショニ                                     
+書いてありますよね                            & カイテアリマスヨネ<H>                        
+それから                                      & (W ソエ;ソレ)カラ                            
+(F その)                                      & (F ソノ)                                     
+単語が                                        & タンゴガ                                     
+くっ付いた                                    & クッツイタ                                   
+時に                                          & トキニ<H>                                    
+0025 00042.086-00042.410 L:
+(F うん)                                      & (F <VN>)                                     
+0026 00045.143-00046.896 R:
+(D (? つ))                                    & (D (? ツ))                                   
+くっ付いて                                    & クッツイテ                                   
+ほら                                          & ホラ                                         
+文を                                          & ブンオ                                       
+作ったり                                      & ツクッタリ                                   
+0027 00047.033-00047.309 L:
+(F はい)                                      & (F ハイ)                                     
+0028 00047.107-00050.050 R:
+する                                          & スル                                         
+時に                                          & トキニ<H>                                    
+助詞が                                        & ジョシ(? ガ)                                 
+名詞に                                        & メーシニ                                     
+助詞が                                        & ジョシガ                                     
+くっ付いて                                    & クッツイテ<H>                                
+0029 00050.267-00051.283 R:
+動詞が                                        & ドーシガ                                     
+あって                                        & アッテ                                       
+0030 00051.534-00052.914 R:
+最後に                                        & サイゴニ                                     
+助動詞が                                      & ジョドーシガ                                 
+あって                                        & アッテ                                       
+0031 00053.116-00053.973 R:
+ってのは                                      & ッテノワ                                     
+(D す)                                        & (D ス)                                       
+(F まー)                                      & (F マー)                                     
+そういう                                      & ソーユー                                     
+0032 00054.237-00054.812 R:
+普通に                                        & フツーニ                                     
+0033 00055.349-00058.636 R:
+言語学の                                      & ゲンゴガクノ                                 
+教科書に                                      & キョーカショニ                               
+書いてあるような                              & カイテアルヨー(W (? ン);ナ)                  
+それが                                        & ソレガ                                       
+(F まー)                                      & (F マー)                                     
+言語情報ですね                                & ゲンゴジョーホーデスネ                       
+0034 00056.998-00057.373 L:
+(F はい)                                      & (F ハイ)                                     
+0035 00059.323-00060.808 R:
+で                                            & デ                                           
+パラって                                      & パラッテ                                     
+言葉はね                                      & コトバワネ<H>                                
+0036 00061.138-00062.393 R:
+(?)語源的にはね                               & (?)ゴゲンテキニワネ<H>                       
+0037 00062.773-00063.557 R:
+(F そのー)                                    & (F ソノー)                                   
+0038 00064.159-00066.039 R:
+何とかの                                      & ナントカノ                                   
+横にとかね                                    & ヨコニ<Q>トカネ                              
+0039 00066.163-00066.903 L:
+(F はー)                                      & (F ハー)                                     
+0040 00066.599-00067.278 R:
+隣りに                                        & トナリニ                                     
+0041 00067.524-00067.898 R:
+とかね                                        & トカネ                                       
+0042 00068.271-00069.537 R:
+そういう                                      & ソーユー                                     
+意味なんですよ                                & イミナンデスヨ                               
+0043 00069.548-00069.944 L:
+(F うん)                                      & (F <VN>)                                     
+0044 00071.148-00074.140 R:
+で                                            & デ<H>                                        
+つまり                                        & ツマリ                                       
+言語から                                      & ゲンゴカラ                                   
+ちょっと                                      & チョット                                     
+ずれたとこに                                  & ズレタトコニ                                 
+ある                                          & アル                                         
+0045 00074.125-00074.514 L:
+(F はい)                                      & (F ハイ)                                     
+0046 00074.349-00074.835 R:
+情報                                          & ジョーホー                                   
+0047 00075.089-00075.690 L:
+(F ふーん)                                    & (F <VN>)                                     
+0048 00075.201-00075.726 R:
+だけど                                        & ダケド                                       
+0049 00076.286-00076.757 R:
+だけど                                        & ダケド                                       
+0050 00077.039-00079.949 R:
+(F そのー)                                    & (F ソノー)                                   
+実際には                                      & ジッサイニワ                                 
+存在してる                                    & ソンザイシテル                               
+情報っていう                                  & ジョーホー(? ッテユー)                       
+ことですね                                    & コトデスネ                                   
+%%【略】
+%<EOT>
--- a/data/samples/task-smp.mp3
+++ b/data/samples/task-smp.mp3
--- a/data/samples/task-smp.txt
+++ b/data/samples/task-smp.txt
@ -0,0 +1,121 @@
+%講演ID:D02M0016
+%
+%<SOT>
+0001 00000.314-00000.946 L:
+開けます                                      & アケマス                                     
+0002 00001.020-00001.420 R:
+(F はい)                                      & (F ハ<H>イ)                                  
+0003 00003.253-00003.612 R:
+(F あー)                                      & (F アー)                                     
+0004 00003.366-00003.670 R:<雑音>
+0005 00003.971-00005.436 R:
+僕の                                          & ボクノ                                       
+方には                                        & ホーニワ                                     
+写真が                                        & シャシンガ                                   
+入ってる                                      & ハイッテル                                   
+0006 00006.080-00011.018 L:
+(F あ)                                        & (F ア)                                       
+あたしの                                      & アタシノ                                     
+方は                                          & ホーワ<H>                                    
+(F あの)                                      & (F アノ)                                     
+名前と                                        & ナマエト                                     
+(D けー)                                      & (D ケー)                                     
+一言っていうのが                              & ヒトコトッテユーノガ                         
+入ってます                                    & ハイッテマス                                 
+(F はい)                                      & (F ハイ)                                     
+0007 00010.192-00010.473 R:
+(F あー)                                      & (F アー)                                     
+そう                                          & ソー                                         
+0008 00010.980-00011.187 R:
+(F ん)                                        & (F (? ン))                                   
+0009 00011.951-00014.537 R:
+知らない                                      & シ<H>ラナイ                                  
+人が                                          & ヒトガ                                       
+いっぱい                                      & (笑 イッパイ                                 
+いるぞ                                        & イルゾ<H>)<笑>                               
+0010 00014.391-00015.204 L:
+(F え)                                        & (F エ)                                       
+そうですか                                    & ソーデスカ<H>                                
+0011 00015.182-00015.785 R:
+(F うーん)                                    & (F <VN>)                                     
+0012 00015.866-00018.167 L:
+名前                                          & ナマエ                                       
+じゃ                                          & ジャ<H>                                      
+ちょっと                                      & チョット                                     
+読み上げてくんで                              & ヨミアゲテクンデ<H>                          
+0013 00017.339-00017.827 R:
+(F はい)(F はい)                              & (F ハイ)(F ハイ)                             
+0014 00018.111-00018.436 R:
+(F うん)                                      & (F <VN>)                                     
+0015 00018.690-00019.721 L:
+行きます                                      & イキマス                                     
+上から                                        & ウエカラ                                     
+0016 00019.313-00019.554 R:
+(F はい)                                      & (F ハイ)                                     
+0017 00019.934-00020.263 R:
+(F うん)                                      & (F <VN>)                                     
+0018 00019.961-00020.752 L:
+平野レミ                                      & ヒラノレミ                                   
+0019 00021.406-00022.671 R:
+平野レミ                                      & ヒラノレミ                                   
+0020 00022.308-00028.617 L:
+平野レミ                                      & ヒラノレミ                                   
+(F あの)                                      & (F アノ)                                     
+お料理                                        & オリョーリ<H>                                
+お料理                                        & オリョーリ                                   
+(F あのー)                                    & (F アノー)                                   
+お料理研究家の                                & オリョーリケンキューカノ                     
+人                                            & (W シト;ヒト)                                
+(F はい)                                      & (F ハイ)                                     
+眼鏡                                          & メガネ                                       
+掛けてて                                      & カケテテ                                     
+ショートカット                                & ショートカット                               
+0021 00025.914-00027.060 R:
+女の                                          & オンナノ                                     
+人だよね                                      & ヒトダヨネ                                   
+0022 00027.805-00029.129 R:
+眼鏡                                          & メガネ                                       
+掛けた                                        & (? カ)ケタ                                   
+(F あー)                                      & (F アー)                                     
+分かった                                      & ワカッタ                                     
+分かった                                      & ワカッタ                                     
+0023 00028.159-00028.678 R:<雑音>
+0024 00028.822-00029.086 L:
+(F はい)                                      & (F ハイ)                                     
+0025 00029.318-00029.520 R:
+(F はい)                                      & (F (W アイ;ハイ))                            
+0026 00030.031-00030.914 L:
+セルジオ                                      & セルジオ<H>                                  
+0027 00030.138-00031.642 R:
+ちょっと                                      & (W チョト;チョット)                          
+待ってね                                      & マッテネ                                     
+平野                                          & ヒラノ                                       
+0028 00031.314-00031.582 L:
+(F はい)                                      & (F ハイ)                                     
+0029 00031.939-00033.269 R:
+平らな                                        & タイラナ                                     
+野っ原ですか                                  & ノッパラデスカ                               
+0030 00033.130-00034.861 L:
+(F はい)                                      & (F ハイ)                                     
+レミは                                        & レミワ                                       
+片仮名です                                    & カタカナデス                                 
+0031 00033.577-00033.830 R:
+(D (? ん))                                    & (D (? ン))                                   
+0032 00035.022-00035.267 R:
+(F はい)                                      & (F ハイ)                                     
+0033 00036.548-00036.863 R:
+それから                                      & (? ソ)レカラ                                 
+0034 00036.998-00038.020 L:
+セルジオ越後                                  & セルジオエチゴ                               
+0035 00038.760-00040.562 R:
+これ                                          & コレ                                         
+分かんない                                    & ワカンナイ                                   
+セルジオ越後って                              & (笑 セルジ(? オ)エチゴッテ)                  
+誰                                            & ダレ                                         
+0036 00040.110-00043.657 L:
+これは                                        & コレワ<H>                                    
+(F あのー)                                    & (F アノー)                                   
+サッカーの                                    & サッカーノ<H>                                
+解説者なんですけど                            & カイセツシャナンデスケド<H>                  
+%%【略】
+%<EOT>
--- a/docs/outline.md
+++ b/docs/outline.md
@ -0,0 +1,39 @@
+# speech-to-text-pipeline
+
+音声データをテキストデータへ変換し最終成果物を出力する
+
+## ユースケース
+
+* 会議録の音声メモを入力して会議録を作成する
+* 音声データからアクションプランや課題を抽出して、レポートを出力する
+
+## 機能
+
+* 音声フォーマット統一
+* チャンク処理
+* 音声前処理(ノイズ除去)
+* 音声強調
+* 話者分別機能(VAD)
+* 文字起こし
+* テキストファイル出力
+* 制度評価
+* 精度結果出力
+  
+
+
+## 実現する技術
+
+**インフラ**
+
+* GPU処理
+  * Modal
+* Pipeライン
+  * 検討中
+
+
+
+## 仕様
+
+### 音声フォーマット統一
+
+* Whisper系やgpt-4o-transcribeの両方ともmono / 16kHz / PCM16 が最適となる
--- a/examples/example_main.py
+++ b/examples/example_main.py
@ -0,0 +1,11 @@
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
+
+from dotenv import load_dotenv
+load_dotenv(".env")
+
+
+def example():
+    
+
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1 @@
+librosalibrosa
--- a/src/app.py
+++ b/src/app.py
@ -0,0 +1,17 @@
+
+from lib.custom_logger import get_logger
+from app_status import AppStatus
+from pipeline.app_pipeline import AppPipeline
+
+def app_start():
+    logger = get_logger()
+    logger.info("Application started")
+    app_status = AppStatus()
+    app_status.request_id = "6cb2da8f-ffde-4af6-9c25-19513da40b2c"
+    logger.info(f"Input file path: {app_status.input_filepath}")
+
+    pipeline = AppPipeline()
+    pipeline.run()
+
+
+
--- a/src/app_file_storage.py
+++ b/src/app_file_storage.py
@ -0,0 +1,68 @@
+import os
+import shutil
+from app_status import AppStatus
+from lib.custom_logger import get_logger
+
+logger = get_logger()
+
+class AppFileStorage:
+
+    @classmethod
+    def create_output_dir(cls) -> str:
+        """出力ディレクトリを作成してパスを返す"""
+        app_status = AppStatus()
+        base_dir = app_status.output_base_dir
+        request_id = app_status.request_id
+        if not request_id:
+            raise ValueError("Request ID is not set in AppStatus")
+        try:
+            output_dir = f"{base_dir}/{request_id}"
+            os.makedirs(output_dir, exist_ok=True)
+            logger.info(f"Output directory created at: {output_dir}")
+        except Exception as e:
+            logger.error(f"Error creating output directory: {e}")
+            raise
+
+    @classmethod
+    def copy_to_source(cls) -> str:
+        """出力ディレクトリを作成してパスを返す"""
+        app_status = AppStatus()
+        # ファイル元のパス
+        source_file = app_status.input_filepath
+        # ファイルのコピー先ディレクトリ
+        destination_dir = app_status.source_dir
+        if not destination_dir:
+            raise ValueError("Source directory is not set in AppStatus")
+        try:
+            os.makedirs(destination_dir, exist_ok=True)
+            # ファイル名を取得してコピー先のフルパスを作成
+            filename = os.path.basename(source_file)
+            destination_file = os.path.join(destination_dir, filename)
+            # ファイルをコピー
+            shutil.copy2(source_file, destination_file)
+            logger.info(f"File copied to source directory: {destination_file}")
+            app_status.source_file = destination_file
+            return destination_file
+        except Exception as e:
+            logger.error(f"Error copying file to source directory: {e}")
+            raise
+
+    @classmethod
+    def set_source_file(cls) -> str:
+        app_status = AppStatus()
+        # ファイル元のパス
+        source_file = app_status.input_filepath
+        # ファイルのコピー先ディレクトリ
+        destination_dir = app_status.source_dir
+        if not destination_dir:
+            raise ValueError("Source directory is not set in AppStatus")
+        try:
+            os.makedirs(destination_dir, exist_ok=True)
+            # ファイル名を取得してコピー先のフルパスを作成
+            filename = os.path.basename(source_file)
+            destination_file = os.path.join(destination_dir, filename)
+            app_status.source_file = destination_file
+            return destination_file
+        except Exception as e:
+            logger.error(f"Error copying file to source directory: {e}")
+            raise
--- a/src/app_status.py
+++ b/src/app_status.py
@ -0,0 +1,85 @@
+from lib.singleton import Singleton
+
+class AppStatus(Singleton):
+    """アプリケーションの状態を管理するシングルトンクラス"""
+    def __init__(self):
+        if hasattr(self, '_initialized') and self._initialized:
+            return  # すでに初期化済みなら何もしない
+        self.status = {}
+        self._initialized = True
+
+    def reset(self):
+        """状態をリセット"""
+        self.status.clear()
+
+    def set_status(self, key, value):
+        self.status[key] = value
+    
+    def get_status(self, key, default=None):
+        return self.status.get(key, default)
+        
+
+    @property
+    def input_filepath(self)-> str:
+        """入力音声ファイルのパス"""
+        return self.get_status('input_filepath')
+    
+    @input_filepath.setter
+    def input_filepath(self, value:str):
+        """入力音声ファイルのパス"""
+        self.set_status('input_filepath', value)
+    
+    @property
+    def request_id(self) -> str:
+        """リクエストID"""
+        return self.get_status('request_id')
+
+    @request_id.setter
+    def request_id(self, value: str):
+        """リクエストID"""
+        self.set_status('request_id', value)
+
+    @property
+    def output_base_dir(self)-> str:
+        """出力ディレクトリのベースパス"""
+        return self.get_status('output_base_dir', default='.output')
+
+    @output_base_dir.setter
+    def output_base_dir(self, value:str):
+        """出力ディレクトリのベースパス"""
+        self.set_status('output_base_dir', value)
+
+    @property
+    def output_dir(self)-> str:
+        """出力ディレクトリのパス"""
+        output_dir = f"{self.output_base_dir}/{self.request_id}"
+        return output_dir
+
+    @property
+    def source_dir(self)-> str:
+        """ソースディレクトリのパス"""
+        source_dir = f"{self.output_base_dir}/{self.request_id}/source"
+        return source_dir
+
+    @property
+    def chunk_dir(self)-> str:
+        """チャンクディレクトリのパス"""
+        chunk_dir = f"{self.output_base_dir}/{self.request_id}/chunk"
+        return chunk_dir
+
+
+
+    @property
+    def source_file(self)-> str:
+        """ソースファイルのパス"""
+        return self.get_status('source_file')
+
+    @source_file.setter
+    def source_file(self, value:str):
+        """ソースファイルのパス"""
+        self.set_status('source_file', value)
+
+    @property
+    def unified_file(self)-> str:
+        """統一ファイルのパス"""
+        return f"{self.output_dir}/unified.wav"
--- a/src/jobs/init.py
+++ b/src/jobs/init.py
--- a/src/jobs/job_base.py
+++ b/src/jobs/job_base.py
@ -0,0 +1,16 @@
+from lib.custom_logger import get_logger
+from app_status import AppStatus
+
+class JobBase():
+    """ジョブの基底クラス"""    
+    def __init__(self, name="JobBase"):
+        self.logger = get_logger()
+        self.name = name
+        self.status = AppStatus()
+        self.logger.info(f"{self.name} initialized")
+
+    
+    def execute(self):
+        """ジョブの実行"""
+        self.logger.info(f"{self.name} execute called")
+        raise NotImplementedError("Subclasses must implement this method")
--- a/src/jobs/job_chunk_files.py
+++ b/src/jobs/job_chunk_files.py
@ -0,0 +1,72 @@
+import os
+from jobs.job_base import JobBase
+
+class JobChunkFiles(JobBase):
+    """音声ファイルをチャンクに分割するジョブ"""
+
+    def __init__(self):
+        super().__init__(name=self.__class__.__name__)
+        self.description = "Chunk Audio Files Job"
+
+    def _chunk_ffmpeg(self, src, dst, segment_time: int = 1200, overlap: int = 2):
+        import subprocess, pathlib, math, json
+
+        out = pathlib.Path(dst)
+        out.mkdir(parents=True, exist_ok=True)
+
+        # 総尺（秒）を ffprobe で取得
+        dur = float(subprocess.check_output(
+            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+            "-of", "default=noprint_wrappers=1:nokey=1", src],
+            text=True).strip())
+
+        step = segment_time - overlap  # 次チャンクの開始 = 前チャンク開始 + step
+        if step <= 0:
+            raise ValueError("overlap は segment_time より小さくしてください")
+
+        i = 0
+        manifest = []
+        start = 0.0
+        while start < dur:
+            end = min(start + segment_time, dur)
+            # ※ 精密カットしたいので -i の後ろに -ss/-to を置く
+            dst = out / f"{i:06d}.wav"
+            cmd = [
+                "ffmpeg", "-y",
+                "-i", src,
+                "-ss", f"{start:.3f}",
+                "-to", f"{end:.3f}",
+                "-c", "copy",   # WAVならcopyでOK（圧縮音源なら再エンコード推奨）
+                str(dst)
+            ]
+            subprocess.run(cmd, check=True)
+
+            manifest.append({
+                "chunk_id": f"{i:06d}",
+                "abs_start": round(start, 3),
+                "abs_end": round(end, 3),
+                "overlap_right": overlap if end < dur else 0.0,
+                "path": str(dst)
+            })
+            i += 1
+            start += step
+
+        # manifest を保存（後段で絶対時刻復元に使う）
+        (out.parent / "chunks.manifest.jsonl").write_text(
+            "\n".join(json.dumps(m, ensure_ascii=False) for m in manifest),
+            encoding="utf-8"
+        )
+        return manifest    
+
+    def execute(self):
+        self.logger.info(f"{self.name} execute started")
+
+        if os.path.exists(self.status.chunk_dir):
+            # すでに変換済み
+            self.logger.info(f"Audio already standardized: {self.status.unified_file}")
+            return
+
+        src = self.status.unified_file
+        dst = self.status.chunk_dir
+        self._chunk_ffmpeg(src, dst)
+        return
--- a/src/jobs/job_get_request_id.py
+++ b/src/jobs/job_get_request_id.py
@ -0,0 +1,24 @@
+import uuid
+
+from jobs.job_base import JobBase
+from app_file_storage import AppFileStorage
+
+class JobGetRequestId(JobBase):
+    """リクエストIDを取得するジョブ"""
+    def __init__(self):
+        super().__init__(name=self.__class__.__name__)
+        self.description = "Get Request ID Job"
+    
+    def execute(self):
+        self.logger.info(f"{self.name} execute started")
+        if self.status.request_id:
+            self.logger.info(f"Request ID already set: {self.status.request_id}")
+            return
+
+        request_id = str(uuid.uuid4())  
+        self.status.request_id = request_id
+        self.logger.info(f"Obtained request ID: {request_id}")
+        # request_idファイルを生成する
+        AppFileStorage.create_output_dir()
+
+        return 
--- a/src/jobs/job_set_soruce_file.py
+++ b/src/jobs/job_set_soruce_file.py
@ -0,0 +1,23 @@
+import os
+import uuid
+
+import app_status
+from jobs.job_base import JobBase
+from app_file_storage import AppFileStorage
+
+class JobSetSourceFile(JobBase):
+    """ソースファイルを設定するジョブ"""
+    def __init__(self):
+        super().__init__(name=self.__class__.__name__)
+        self.description = "Set Source File Job"
+
+    def execute(self):
+        self.logger.info(f"{self.name} execute started")
+        # outputにsourceフォルダが存在する場合
+        if os.path.exists(self.status.source_dir):
+            self.logger.info(f"Source directory already set: {self.status.source_dir}")
+            AppFileStorage.set_source_file()
+        else:
+            self.logger.info("Source directory is not set")
+            # ソースファイルをコピー
+            AppFileStorage.copy_to_source()
--- a/src/jobs/job_standardize_format.py
+++ b/src/jobs/job_standardize_format.py
@ -0,0 +1,28 @@
+import os
+from jobs.job_base import JobBase
+
+class JobStandardizeFormat(JobBase):
+    """音声ファイルのフォーマットを標準化するジョブ"""
+    def __init__(self):
+        super().__init__(name=self.__class__.__name__)
+        self.description = "Standardize Audio Format Job"
+
+    def _convert_ffmpeg(self, src, dst):
+        import subprocess, pathlib
+        pathlib.Path(dst).parent.mkdir(parents=True, exist_ok=True)
+        cmd = ["ffmpeg","-y","-i",src,"-ac","1","-ar","16000","-c:a","pcm_s16le",dst]
+        subprocess.run(cmd, check=True)
+
+    def execute(self):
+        self.logger.info(f"{self.name} execute started")
+
+        if os.path.exists(self.status.unified_file):
+            # すでに変換済み
+            self.logger.info(f"Audio already standardized: {self.status.unified_file}")
+            return
+
+        src = self.status.source_file
+        dst = self.status.unified_file
+        # フォーマット変換処理(WAV mono / 16kHz / PCM16)
+        self._convert_ffmpeg(src, dst)
+        return
--- a/src/jobs/job_visualize_audio.py
+++ b/src/jobs/job_visualize_audio.py
@ -0,0 +1,82 @@
+import os
+import json
+from pathlib import Path
+
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import numpy as np
+
+from jobs.job_base import JobBase
+from app_file_storage import AppFileStorage
+
+
+class JobVisualizeAudio(JobBase):
+    """
+    音声の波形とスペクトログラムを可視化するジョブ (CPU)
+    """
+    def __init__(self):
+        super().__init__(name=self.__class__.__name__)
+        self.description = "Visualize Audio (waveform & spectrogram)"
+
+    def get_visualization(self, audio_path: str, out_dir: str,
+                n_fft: int = 1024, hop_length: int = 256):
+
+        self.logger.info(f"{self.name} started: {audio_path}")
+        out = Path(out_dir)
+        out.mkdir(parents=True, exist_ok=True)
+
+        # 1) 音声読み込み（SRそのまま）
+        y, sr = librosa.load(audio_path, sr=None, mono=True)
+        dur = y.shape[0] / sr
+        self.logger.info(f"loaded: sr={sr}, dur={dur:.2f}s, samples={len(y)}")
+
+        # 2) 波形
+        plt.figure(figsize=(12, 3))
+        librosa.display.waveshow(y, sr=sr)
+        plt.title("Waveform")
+        plt.xlabel("Time (s)")
+        plt.ylabel("Amplitude")
+        plt.tight_layout()
+        plt.savefig(out / "waveform.png", dpi=150)
+        plt.close()
+
+        # 3) スペクトログラム
+        D = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
+        DB = librosa.amplitude_to_db(D, ref=np.max)
+        plt.figure(figsize=(12, 4))
+        librosa.display.specshow(DB, sr=sr, hop_length=hop_length,
+                                x_axis="time", y_axis="log")
+        plt.colorbar(format="%+2.0f dB")
+        plt.title("Spectrogram (dB)")
+        plt.tight_layout()
+        plt.savefig(out / "spectrogram.png", dpi=150)
+        plt.close()
+
+        # 4) サマリ保存
+        summary = {
+            "audio_path": str(audio_path),
+            "sr": sr,
+            "duration_sec": dur,
+            "outputs": {
+                "waveform": str(out / "waveform.png"),
+                "spectrogram": str(out / "spectrogram.png"),
+            }
+        }
+        (out / "visualize_summary.json").write_text(
+            json.dumps(summary, ensure_ascii=False, indent=2)
+        )
+        self.logger.info(f"{self.name} done: results under {out}")
+
+
+    def execute(self):
+        self.logger.info(f"{self.name} execute started")
+
+        if os.path.exists(f"{self.status.output_dir}/unified"):
+            # すでに可視化済み
+            self.logger.info(f"Visualization already done: {self.status.output_dir}/unified")
+            return
+
+        audio_path = self.status.unified_file
+        self.get_visualization(audio_path, f"{self.status.output_dir}/unified")
+        return
--- a/src/lib/init.py
+++ b/src/lib/init.py
@ -0,0 +1,10 @@
+"""
+This module provides the pengent library.
+"""
+
+from .custom_logger import get_logger, CustomLogger
+
+__all__ = [
+    "get_logger",
+    "CustomLogger",
+]
--- a/src/lib/common.py
+++ b/src/lib/common.py
@ -0,0 +1,2 @@
+import re
+
--- a/src/lib/custom_logger.py
+++ b/src/lib/custom_logger.py
@ -0,0 +1,56 @@
+import logging
+import functools
+from .singleton import Singleton
+
+class CustomLogger(Singleton):
+    """
+    Singleton logger class that initializes a logger with a specified name and log file.
+    It provides a method to log entry and exit of functions.
+    """
+
+    def __init__(self, name='main', log_file=None, level=logging.INFO):
+        if hasattr(self, '_initialized') and self._initialized:
+            return  # すでに初期化済みなら何もしない
+            # self.logger.setLevel(level)
+
+        self.logger = logging.getLogger(name)
+        self.logger.setLevel(level)
+        self.logger.propagate = False
+
+        formatter = logging.Formatter(
+            '%(asctime)s %(levelname)s [%(filename)s:%(lineno)3d]: %(message)s'
+        )
+
+        # Console handler
+        ch = logging.StreamHandler()
+        ch.setFormatter(formatter)
+        self.logger.addHandler(ch)
+
+        # File handler
+        if log_file:
+            fh = logging.FileHandler(log_file, encoding='utf-8')
+            fh.setFormatter(formatter)
+            self.logger.addHandler(fh)
+
+        self._initialized = True
+
+
+    def get_logger(self):
+        return self.logger
+
+    def log_entry_exit(self, func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            self.logger.info(f"Enter: {func.__qualname__}")
+            result = func(*args, **kwargs)
+            self.logger.info(f"Exit: {func.__qualname__}")
+            return result
+        return wrapper
+
+
+
+
+
+def get_logger(name='main', log_file=None, level=logging.INFO):
+    custom_logger = CustomLogger(name, log_file, level)
+    return custom_logger.get_logger()
--- a/src/lib/singleton.py
+++ b/src/lib/singleton.py
@ -0,0 +1,20 @@
+"""Singleton pattern implementation in Python.
+This implementation is thread-safe and ensures that only one instance of the class is created.
+
+Singleton が提供するのは「同じインスタンスを返す仕組み」
+* __init__() は毎回呼ばれる(多くの人が意図しない動作)
+* __init__の2回目は_initialized というフラグは 使う側で管理する必要がある。
+"""
+
+import threading
+
+class Singleton(object):
+    _instances = {}
+    _lock = threading.Lock()
+
+    def __new__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            with cls._lock:
+                if cls not in cls._instances:  # ダブルチェック
+                    cls._instances[cls] = super(Singleton, cls).__new__(cls)
+        return cls._instances[cls]
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,35 @@
+import argparse
+import os
+import sys
+
+from lib.custom_logger import get_logger
+from app_status import AppStatus
+from app import app_start
+
+def main():
+    parser = argparse.ArgumentParser(description="Speech to Text Pipeline")
+    parser.add_argument("filepath", type=str, help="Path to the audio file")
+    args = parser.parse_args()
+    logger = get_logger()
+
+    if not os.path.isfile(args.filepath):
+        logger.error(f"File not found: {args.filepath}")
+        sys.exit(1)
+
+    # ファイルの拡張子が音声データ(FFMPEGでwavに変換可能)であることを確認
+    valid_extensions = ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a']
+    if not any(args.filepath.lower().endswith(ext) for ext in valid_extensions):
+        logger.error("Invalid file format. Supported formats are: " + ", ".join(valid_extensions))
+        sys.exit(1)        
+
+    logger.info(f"Processing file: {args.filepath}")
+    # ここに音声認識処理を追加
+
+    app_status = AppStatus()
+    app_status.reset()
+    app_status.input_filepath = args.filepath
+    app_start()
+
+
+if __name__ == "__main__":
+    main()
--- a/src/pipeline/app_pipeline.py
+++ b/src/pipeline/app_pipeline.py
@ -0,0 +1,19 @@
+from pipeline.pipeline_base import PipelineBase
+from jobs.job_get_request_id import JobGetRequestId
+from jobs.job_set_soruce_file import JobSetSourceFile
+from jobs.job_standardize_format import JobStandardizeFormat
+from jobs.job_visualize_audio import JobVisualizeAudio
+from jobs.job_chunk_files import JobChunkFiles
+
+class AppPipeline(PipelineBase):
+    """アプリケーションのパイプライン"""
+    def __init__(self):
+        super().__init__()
+        self.logger.info("AppPipeline initialized")
+        self.add_job(JobGetRequestId())
+        self.add_job(JobSetSourceFile())
+        self.add_job(JobStandardizeFormat())
+        self.add_job(JobChunkFiles())
+        self
+
+
--- a/src/pipeline/pipeline_base.py
+++ b/src/pipeline/pipeline_base.py
@ -0,0 +1,17 @@
+from typing import List
+from jobs.job_base import JobBase
+from lib.custom_logger import get_logger
+logger = get_logger()
+
+class PipelineBase:
+    """Pipelineの基本クラス"""
+    def __init__(self):
+        self.jobs:List[JobBase] = []
+        self.logger = get_logger()
+
+    def add_job(self, job: JobBase):
+        self.jobs.append(job)
+
+    def run(self):
+        for job in self.jobs:
+            job.execute()