Skip to content

Commit 57ee83f

Browse files
committed
Rename some modules to semantic fit names
Signed-off-by: Skye <[email protected]>
1 parent f9294dd commit 57ee83f

File tree

8 files changed

+1183
-0
lines changed

8 files changed

+1183
-0
lines changed

audiobot/utils/metadata_utils.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import speech_recognition as sr
2+
import ffmpeg
3+
from ..logging_config import setup_colored_logger
4+
from filemac.utils.colors import fg, rs
5+
6+
RESET = rs
7+
8+
Clogger = setup_colored_logger()
9+
10+
11+
def get_bitrate(input_file, verbosity=False):
12+
"""
13+
Probes a media file using ffmpeg and returns its metadata.
14+
15+
Args:
16+
input_file (str): The path to the media file.
17+
18+
Returns:
19+
int: bitrate
20+
21+
Raises:
22+
ffmpeg.Error: If ffmpeg returns a non-zero exit code.
23+
FileNotFoundError: If the input file does not exist.
24+
Exception: For other errors during probing.
25+
"""
26+
if verbosity:
27+
Clogger.info(
28+
f"Fetch the original bitrate of the video file using {fg.YELLOW_FG}ffmpeg{RESET}."
29+
)
30+
try:
31+
try:
32+
metadata = ffmpeg.probe(input_file)
33+
finally:
34+
bitrate = None
35+
# Iterate over the streams and find the video stream
36+
for stream in metadata["streams"]:
37+
if stream["codec_type"] == "video":
38+
bitrate = stream.get("bit_rate", None)
39+
break
40+
return bitrate
41+
except ffmpeg.Error or Exception as e:
42+
Clogger.error(f"Error fetching bitrate for {input_file}: {e}")
43+
return None
44+
45+
46+
def transcribe_audio(input_file):
47+
Clogger.info(f"Transcribing audio: {input_file}")
48+
try:
49+
recognizer = sr.Recognizer()
50+
with sr.AudioFile(input_file) as source:
51+
audio = recognizer.record(source)
52+
transcription = recognizer.recognize_google(audio)
53+
Clogger.info(f"Transcription: {transcription}")
54+
return transcription
55+
except Exception as e:
56+
Clogger.error(f"Error transcribing audio file {input_file}: {e}")
57+
return None

audiobot/utils/visualizer.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import matplotlib.pyplot as plt
2+
import soundfile as sf
3+
from ..logging_config import setup_colored_logger
4+
5+
6+
Clogger = setup_colored_logger()
7+
8+
9+
def visualize_audio_wave(original_file, modified_file):
10+
Clogger.info(f"Visualizing audio: {original_file} and {modified_file}")
11+
try:
12+
original_data, original_sr = sf.read(original_file)
13+
modified_data, modified_sr = sf.read(modified_file)
14+
15+
plt.figure(figsize=(14, 5))
16+
plt.subplot(2, 1, 1)
17+
plt.plot(original_data)
18+
plt.title("Original Audio Waveform")
19+
plt.subplot(2, 1, 2)
20+
plt.plot(modified_data)
21+
plt.title("Modified Audio Waveform")
22+
plt.show()
23+
24+
except Exception as e:
25+
Clogger.error(f"Error visualizing audio: {e}")

filemac/cli/converter.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
import os
2+
import sys
3+
from typing import List, Union
4+
from ..core.warning import default_supressor
5+
from ..utils.simple import logger
6+
from ..utils.colors import fg, rs
7+
from ..core.tts.gtts import GoogleTTS
8+
from ..utils.formats import (
9+
SUPPORTED_AUDIO_FORMATS_DIRECT,
10+
)
11+
from ..core.document import DocConverter
12+
13+
RESET = rs
14+
default_supressor()
15+
16+
17+
class DirectoryConverter:
18+
"""
19+
If the input file in convert_doc argument is a directory, walk throught the directory and
20+
converter all the surported files to the target format
21+
"""
22+
23+
def __init__(self, _dir_, _format_, no_resume, threads, _isolate_=None):
24+
self._dir_ = _dir_
25+
self._format_ = _format_
26+
self._isolate_ = _isolate_
27+
self.no_resume = no_resume
28+
self.threads = threads
29+
# Handle isolation and non isolation modes distinctively
30+
self._ls_ = (
31+
["pdf", "docx", "doc", "xlsx", "ppt", "pptxxls", "txt"]
32+
if _isolate_ is None
33+
else [_isolate_]
34+
)
35+
if self._isolate_:
36+
print(
37+
f"INFO\t {fg.FMAGENTA_FG}Isolate {fg.DCYAN_FG}{self._isolate_}{RESET}"
38+
)
39+
40+
def _unbundle_dir_(self):
41+
if self._format_ in SUPPORTED_AUDIO_FORMATS_DIRECT:
42+
return Batch_Audiofy(self._dir_, self.no_resume, self.threads)
43+
try:
44+
for root, dirs, files in os.walk(self._dir_):
45+
for file in files:
46+
_ext_ = file.split(".")[-1]
47+
48+
_path_ = os.path.join(root, file)
49+
50+
if _ext_ in self._ls_ and os.path.exists(_path_):
51+
print(
52+
f"INFO\t {fg.FYELLOW_FG}Parse {fg.BLUE_FG}{_path_}{RESET}"
53+
)
54+
init = MethodMappingEngine(_path_, self._format_)
55+
init.document_eval()
56+
57+
except FileNotFoundError as e:
58+
print(e)
59+
60+
except KeyboardInterrupt:
61+
print("\nQuit!")
62+
sys.exit(1)
63+
64+
except Exception as e:
65+
print(e)
66+
pass
67+
68+
69+
class Batch_Audiofy:
70+
def __init__(
71+
self,
72+
obj: Union[os.PathLike, str, List[Union[os.PathLike, str]]],
73+
no_resume: bool,
74+
threads: int = 3,
75+
):
76+
self.folder = obj
77+
self.no_resume = no_resume
78+
self.threads = threads
79+
self.worker()
80+
81+
def worker(self):
82+
conv = GoogleTTS(self.folder, resume=self.no_resume)
83+
inst = conv.THAudio(conv)
84+
inst.audiofy(num_threads=self.threads)
85+
86+
87+
class MethodMappingEngine:
88+
"""
89+
Class to handle document conversions based on their extensions and the target
90+
output document format
91+
"""
92+
93+
def __init__(self, file, outf):
94+
self.file = file
95+
self.outf = outf
96+
97+
def spreedsheet(self, conv):
98+
if self.outf.lower() == "csv":
99+
conv.convert_xlsx_to_csv()
100+
elif self.outf.lower() in ("txt", "text"):
101+
conv.convert_xls_to_text()
102+
elif self.outf.lower() in list(self.doc_ls):
103+
conv.convert_xls_to_word()
104+
elif self.outf.lower() == "db":
105+
conv.convert_xlsx_to_database()
106+
else:
107+
print(f"{fg.RED_FG}Unsupported output format❌{RESET}")
108+
109+
def word(self, conv):
110+
if self.outf.lower() in ("txt", "text"):
111+
conv.word_to_txt()
112+
elif self.outf.lower() == "pdf":
113+
conv.word_to_pdf()
114+
elif self.outf.lower() in ("pptx", "ppt"):
115+
conv.word_to_pptx()
116+
elif self.outf.lower() in ("audio", "ogg"):
117+
conv = GoogleTTS(self.file)
118+
conv.audiofy()
119+
else:
120+
print(f"{fg.RED_FG}Unsupported output format❌{RESET}")
121+
122+
def text(self, conv):
123+
if self.outf.lower() == "pdf":
124+
conv.txt_to_pdf()
125+
elif self.outf.lower() in ("doc", "docx", "word"):
126+
conv.text_to_word()
127+
elif self.outf.lower() in ("audio", "ogg"):
128+
conv = GoogleTTS(self.file)
129+
conv.audiofy()
130+
else:
131+
print(f"{fg.RED_FG}Unsupported output format❌{RESET}")
132+
133+
def ppt(self, conv):
134+
if self.outf.lower() in ("doc", "docx", "word"):
135+
conv.ppt_to_word()
136+
elif self.outf.lower() in ("text", "txt"):
137+
word = conv.ppt_to_word()
138+
conv = DocConverter(word)
139+
conv.word_to_txt()
140+
elif self.outf.lower() in ("pptx"):
141+
conv.convert_ppt_to_pptx(self.file)
142+
elif self.outf.lower() in ("audio", "ogg", "mp3", "wav"):
143+
conv = GoogleTTS(self.file)
144+
conv.audiofy()
145+
else:
146+
print(f"{fg.RED_FG}Unsupported output format❌{RESET}")
147+
148+
def pdf(self, conv):
149+
if self.outf.lower() in ("doc", "docx", "word"):
150+
conv.pdf_to_word()
151+
elif self.outf.lower() in ("txt", "text"):
152+
conv.pdf_to_txt()
153+
elif self.outf.lower() in ("audio", "ogg", "mp3", "wav"):
154+
conv = GoogleTTS(self.file)
155+
conv.audiofy()
156+
else:
157+
print(f"{fg.RED_FG}Unsupported output format❌{RESET}")
158+
159+
def document_eval(self):
160+
self.doc_ls = ["docx", "doc"]
161+
sheetls = ["xlsx", "xls"]
162+
try:
163+
conv = DocConverter(self.file)
164+
if self.file.lower().endswith(tuple(sheetls)):
165+
self.spreedsheet(conv=conv)
166+
167+
elif self.file.lower().endswith(tuple(self.doc_ls)):
168+
self.word(conv=conv)
169+
170+
elif self.file.endswith("txt"):
171+
self.text(conv=conv)
172+
173+
elif self.file.split(".")[-1].lower() in ("ppt", "pptx"):
174+
self.ppt(conv)
175+
176+
elif self.file.lower().endswith("pdf"):
177+
self.pdf(conv)
178+
179+
elif self.file.lower().endswith("csv"):
180+
if self.outf.lower() in ("xls", "xlsx", "excel"):
181+
conv.convert_csv_to_xlsx()
182+
183+
else:
184+
print(f"{fg.fg.BYELLOW_FG}Unsupported Conversion type❌{RESET}")
185+
pass
186+
except Exception as e:
187+
logger.error(e)
188+
189+
190+
def _isolate_file(_dir_, target):
191+
try:
192+
isolated_files = []
193+
for root, dirs, files in os.walk(_dir_):
194+
for file in files:
195+
if file.lower().endswith(target):
196+
_path_ = os.path.join(root, file)
197+
isolated_files.append(_path_)
198+
return isolated_files
199+
except FileNotFoundError as e:
200+
print(e)
201+
except KeyboardInterrupt:
202+
print("\nQuit!")
203+
sys.exit(1)
204+
except Exception as e:
205+
print(e)

0 commit comments

Comments
 (0)