|
| 1 | +#include <filesystem> |
| 2 | +#include <algorithm> |
1 | 3 | #include "common.h"
|
2 | 4 | #include "common-whisper.h"
|
3 | 5 |
|
@@ -1051,6 +1053,7 @@ int main(int argc, char ** argv) {
|
1051 | 1053 | }
|
1052 | 1054 | }
|
1053 | 1055 |
|
| 1056 | + bool processed_any = false; |
1054 | 1057 | for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
1055 | 1058 | const auto & fname_inp = params.fname_inp[f];
|
1056 | 1059 | struct fout_factory {
|
@@ -1105,10 +1108,43 @@ int main(int argc, char ** argv) {
|
1105 | 1108 | std::vector<float> pcmf32; // mono-channel F32 PCM
|
1106 | 1109 | std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
1107 | 1110 |
|
1108 |
| - if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) { |
1109 |
| - fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str()); |
1110 |
| - continue; |
1111 |
| - } |
| 1111 | + std::string ext; |
| 1112 | +if (fname_inp != "-") { |
| 1113 | + try { |
| 1114 | + ext = std::filesystem::path(fname_inp).extension().string(); |
| 1115 | + std::transform(ext.begin(), ext.end(), ext.begin(), |
| 1116 | + [](unsigned char c){ return std::tolower(c); }); |
| 1117 | + } catch (...) { |
| 1118 | + // ignore; let the decoder try |
| 1119 | + } |
| 1120 | +} |
| 1121 | + |
| 1122 | +auto ext_supported = [](const std::string &e) { |
| 1123 | + // keep in sync with usage: "supported audio formats: flac, mp3, ogg, wav" |
| 1124 | + return e == ".wav" || e == ".mp3" || e == ".flac" || e == ".ogg"; |
| 1125 | +}; |
| 1126 | + |
| 1127 | +if (fname_inp != "-" && !ext.empty() && !ext_supported(ext)) { |
| 1128 | + fprintf(stderr, |
| 1129 | + "error: unsupported audio extension '%s' for '%s'.\n" |
| 1130 | + "supported: flac, mp3, ogg, wav.\n" |
| 1131 | + "hint: convert with ffmpeg, e.g.:\n" |
| 1132 | + " ffmpeg -i \"%s\" -ar 16000 -ac 1 -c:a pcm_s16le out.wav\n", |
| 1133 | + ext.c_str(), fname_inp.c_str(), fname_inp.c_str()); |
| 1134 | + continue; |
| 1135 | +} |
| 1136 | + |
| 1137 | +// Try to read/decode the audio. If it fails, give an actionable hint. |
| 1138 | +if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) { |
| 1139 | + std::string det = ext.empty() ? "" : (" (detected extension: " + ext + ")"); |
| 1140 | + fprintf(stderr, |
| 1141 | + "error: failed to decode audio from '%s'%s.\n" |
| 1142 | + "Make sure the file is not corrupted and has one of: flac, mp3, ogg, wav.\n" |
| 1143 | + "If you still hit this, convert to a standard WAV with:\n" |
| 1144 | + " ffmpeg -i \"%s\" -ar 16000 -ac 1 -c:a pcm_s16le out.wav\n", |
| 1145 | + fname_inp.c_str(), det.c_str(), fname_inp.c_str()); |
| 1146 | + continue; |
| 1147 | +} |
1112 | 1148 |
|
1113 | 1149 | if (!whisper_is_multilingual(ctx)) {
|
1114 | 1150 | if (params.language != "en" || params.translate) {
|
@@ -1258,6 +1294,8 @@ int main(int argc, char ** argv) {
|
1258 | 1294 | fprintf(stderr, "%s: failed to process audio\n", argv[0]);
|
1259 | 1295 | return 10;
|
1260 | 1296 | }
|
| 1297 | + processed_any = true; |
| 1298 | + |
1261 | 1299 | }
|
1262 | 1300 |
|
1263 | 1301 | // output stuff
|
@@ -1286,7 +1324,7 @@ int main(int argc, char ** argv) {
|
1286 | 1324 | }
|
1287 | 1325 | }
|
1288 | 1326 |
|
1289 |
| - if (!params.no_prints) { |
| 1327 | + if (processed_any && !params.no_prints) { |
1290 | 1328 | whisper_print_timings(ctx);
|
1291 | 1329 | }
|
1292 | 1330 | whisper_free(ctx);
|
|
0 commit comments