File size: 15,922 Bytes
6edd739
 
e4e56ea
 
6edd739
e4e56ea
f7ce4f6
e4e56ea
 
6edd739
 
 
e4e56ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b78e1ce
e4e56ea
 
 
6edd739
e4e56ea
 
 
 
850c648
e4e56ea
 
 
 
 
 
 
850c648
e4e56ea
 
 
 
 
 
 
 
 
 
 
6edd739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e56ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6edd739
e4e56ea
 
6edd739
e4e56ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6edd739
 
e4e56ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
import streamlit as st

import os
import tempfile
import gdown
import uuid
import tomllib
from pathlib import Path

# import ffmpeg
from moviepy.video.io.VideoFileClip import VideoFileClip
import cv2

import numpy as np
from io import BytesIO

from pydub import AudioSegment
from pydub.silence import detect_leading_silence
import librosa

import librosa.display as lbd
import matplotlib.pyplot as plt


TEMP_DIR = tempfile.mkdtemp()


CONFIG_FILE = 'app_config.toml'


def load_config():
    '''Loads configuration from app_config.toml'''
    try:
        with open(CONFIG_FILE, 'rb') as f:
            return tomllib.load(f)
    except FileNotFoundError:
        print(f'Error: {CONFIG_FILE} not found. Using default settings.')
        # Provide default fallback config if needed
        return {
            "paths": {"output_dir": "output", "temp_dir": "temp_processing"},
            "models": {"whisper_model": "base.en", "ocr_languages": ["en"], "summarization_model": "google/pegasus-xsum"},
            "settings": {"frame_extraction_interval_seconds": 10, "max_summary_length": 500, "min_summary_length": 100}
        }
    except Exception as e:
        print(f'Error loading config: {e}')
        raise  # Re-raise after printing


CONFIG = load_config()


def ensure_dir(directory_path):
    """Creates a directory if it doesn't exist."""
    Path(directory_path).mkdir(parents=True, exist_ok=True)


def get_secret_api():
    with tempfile.NamedTemporaryFile(delete=False) as tmp:
        gdown.download(id=CONFIG['links']['secret_api_id'], output=tmp.name, quiet=True, fuzzy=True, use_cookies=True)
        tmp.seek(0)
        secret_api = tmp.read().decode('utf-8')
    tmp_path = tmp.name
    tmp.close()
    os.remove(tmp_path)
    return secret_api


def get_secret_prompt():
    with tempfile.NamedTemporaryFile(delete=False) as tmp:
        gdown.download(id=CONFIG['links']['secret_prompt_id'], output=tmp.name, quiet=True)
        tmp.seek(0)
        secret_prompt = tmp.read().decode('utf-8')
    tmp_path = tmp.name
    tmp.close()
    os.remove(tmp_path)
    return secret_prompt


def save_uploaded_file(uploaded_file):
    """Saves an uploaded file to a temporary directory."""
    if uploaded_file is not None:
        # Generate a unique sub-directory for this upload
        session_id = get_session_id()  # simple way to group files per session/upload
        upload_dir = os.path.join(TEMP_DIR, session_id)
        os.makedirs(upload_dir, exist_ok=True)

        file_path = os.path.join(upload_dir, uploaded_file.name)
        with open(file_path, 'wb') as f:
            f.write(uploaded_file.getbuffer())
        print(f'File saved to: {file_path}')  # debugging
        return file_path
    return None


def get_session_id():
    """Generates or retrieves a unique session ID."""
    if 'session_id' not in st.session_state:
        st.session_state['session_id'] = str(uuid.uuid4())[:8]
    return st.session_state['session_id']


def get_session_dir():
    """Gets the temporary directory path for the current session."""
    session_id = get_session_id()
    return os.path.join(TEMP_DIR, session_id)


def get_temp_dir():
    """Creates and returns the path to a temporary directory for processing."""
    temp_dir = Path(CONFIG['paths']['temp_dir'])
    ensure_dir(temp_dir)
    # Consider using unique subdirs per run if needed
    # processing_subdir = tempfile.mkdtemp(dir=temp_dir)
    # return processing_subdir
    return str(temp_dir)  # Return as string for wider compatibility


def extract_audio(video_path, audio_format='wav'):
    """Extracts audio from video using moviepy."""
    try:
        session_dir = os.path.dirname(video_path)  # assumes video is in session dir
        base_name = os.path.splitext(os.path.basename(video_path))[0]
        audio_filename = f"{base_name}_audio.{audio_format}"
        audio_path = os.path.join(session_dir, audio_filename)

        if os.path.exists(audio_path):
            print(f"Audio file already exists: {audio_path}")
            return audio_path

        print(f"Extracting audio from {video_path} to {audio_path}...")
        video_clip = VideoFileClip(video_path)
        audio_clip = video_clip.audio
        if audio_clip is None:
            print("No audio track found in the video.")
            video_clip.close()
            return None
        audio_clip.write_audiofile(audio_path, codec='pcm_s16le' if audio_format == 'wav' else 'mp3')  # WAV is often better for STT
        audio_clip.close()
        video_clip.close()
        print("Audio extraction complete.")
        return audio_path
    except Exception as e:
        print(f"Error extracting audio: {e}")
        # Clean up potentially corrupted file
        if 'audio_clip' in locals() and audio_clip:
            audio_clip.close()
        if 'video_clip' in locals() and video_clip:
            video_clip.close()
        # Attempt to remove partial file if creation failed mid-way
        if os.path.exists(audio_path):
            try:
                os.remove(audio_path)
            except OSError as rm_e:
                print(f"Could not remove partial audio file {audio_path}: {rm_e}")
        return None


from scenedetect import open_video, SceneManager
from scenedetect.detectors import ContentDetector


def extract_frames_pyscenedetect(video_path, output_dir, threshold=2.0):
    # session_dir = os.path.dirname(video_path)
    # frames_dir = os.path.join(session_dir, 'frames_pyscenedetect')
    # os.makedirs(frames_dir, exist_ok=True)
    os.makedirs(output_dir, exist_ok=True)  # ensure the output dir exists

    # Init video- and scene- managers
    # video_manager = VideoManager([video_path])
    video = open_video(video_path)
    scene_manager = SceneManager()
    
    scene_manager.add_detector(ContentDetector(threshold=threshold))

    # Start analysis
    # video_manager.set_downscale_factor()
    # video_manager.start()
    # scene_manager.detect_scenes(frame_source=video_manager)
    scene_manager.detect_scenes(video)
    print(scene_manager.get_scene_list())

    # Get the scene list
    scene_list = scene_manager.get_scene_list()
    print(f'Обнаружено {len(scene_list)} смен сцен.')

    # Save the scenes switch frames
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f'Error: Could not open video file {video_path}')
        return None

    extracted_frame_paths = []

    for i, (start_time, _) in enumerate(scene_list):
        frame_num = start_time.get_frames()
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        success, frame = cap.read()
        if success:
            timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC)
            # frame_filename = f'scene_{i + 1:03d}.jpg'
            # frame_filename = f'frame_{int(timestamp_ms / 1000):06d}.png'  # naming by seconds
            frame_filename = f'frame_{int(timestamp_ms / 1000):06d}.jpg'  # naming by seconds
            frame_path = os.path.join(output_dir, frame_filename)
            cv2.imwrite(frame_path, frame)
            print(f'[*] Сохранён кадр {frame_num} в {frame_path}')
            extracted_frame_paths.append(frame_path)
        else:
            print(f'[!] Ошибка при чтении кадра {frame_num}')

    cap.release()
    return output_dir, extracted_frame_paths
    print(f'Extracted {len(extracted_frame_paths)} frames to {output_dir}.')
    return output_dir, extracted_frame_paths


def extract_frames_interval(video_path, output_dir, interval_sec=5):
    '''Extracts frames from video at specified intervals using OpenCV.'''
    try:
        # session_dir = os.path.dirname(video_path)
        # frames_dir = os.path.join(session_dir, 'frames_interval')
        # os.makedirs(frames_dir, exist_ok=True)
        os.makedirs(output_dir, exist_ok=True)  # ensure the output dir exists

        print(f'Extracting frames from {video_path} every {interval_sec}s..')
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f'Error: Could not open video file {video_path}')
            return None

        fps = cap.get(cv2.CAP_PROP_FPS)
        if fps == 0:
            print('Warning: Could not get FPS, defaulting to 30.')
            fps = 30  # provide a default if FPS is not available

        frame_interval = int(fps * interval_sec)
        frame_count = 0
        extracted_frame_paths = []

        def extract_frame():
            timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC)
            frame_filename = f'frame_{int(timestamp_ms / 1000):06d}.png'  # naming by seconds
            frame_path = os.path.join(output_dir, frame_filename)
            cv2.imwrite(frame_path, frame)
            extracted_frame_paths.append(frame_path)

        success = True
        while success:
            if frame_count % frame_interval == 0:
                success, frame = cap.read()
                if success:
                    extract_frame()
            else:
                # Skip frames efficiently without decoding
                for _ in range(frame_interval - 1):
                    success = cap.grab()
                    if not success:
                        break
                    frame_count += 1
                # Now read the desired frame if grab was successful
                if success:
                    success, frame = cap.retrieve()
                    if success:
                        extract_frame()
                    else:
                        # Handle case where retrieve fails after grab
                        print(f'Warning: Failed to retrieve frame after grab at frame count {frame_count}')

            frame_count += 1

        cap.release()
        print(f'Extracted {len(extracted_frame_paths)} frames to {output_dir}.')
        return output_dir, extracted_frame_paths
    except Exception as e:
        print(f'Error extracting frames: {e}')
        if 'cap' in locals() and cap.isOpened():
            cap.release()
        return None, []


# --- Add other potential helpers: yt-dlp download, file cleanup etc. ---
def download_youtube(url, output_dir):
    """Downloads YouTube video using yt-dlp."""
    import yt_dlp
    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', 
        'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
        'noplaylist': True,  # download only single video if URL is part of playlist
        'progress_hooks': [lambda d: print(d['status'])]  # basic progress
    }
    try:
        print(f'Attempting to download YouTube video: {url}')
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            # Try to get the downloaded filename
            filename = ydl.prepare_filename(info)
            print(f"YouTube video downloaded to: {filename}")
            return filename
    except Exception as e:
        print(f"Error downloading YouTube video: {e}")
        return None


def cleanup_session_files(session_id):
    """Removes the temporary directory for a given session."""
    session_dir = os.path.join(TEMP_DIR, session_id)
    if os.path.exists(session_dir):
        import shutil
        try:
            shutil.rmtree(session_dir)
            print(f"Cleaned up temporary files for session: {session_id}")
        except Exception as e:
            print(f"Error cleaning up session files {session_dir}: {e}")




###
###=== Audio Loading and Processing
###


SAMPLE_RATE = 22050
DURATION = 5

n_mfcc = 13  # number of MFCCs to extract from each sample
n_mels = 128

n_fft = 2048
hop_length = 512

delta_width = 9  # MFCC Delta parameter


def trim_silence(sound, s_thresh=-28.0):
    '''Trims silent chunks from beginning and end of the sound'''
    duration = len(sound)
    
    start_trim = detect_leading_silence(sound, s_thresh)
    end_trim = detect_leading_silence(sound.reverse(), s_thresh)
    
    start = start_trim if start_trim != duration else None
    end = duration - end_trim if end_trim != duration else None
    
    return sound[start:end]


def normalize_volume(sound, target_dBFS=-20.0):
    '''Normalizes sound and shifts to specified loudness'''
    sound = sound.normalize()
    difference = target_dBFS - sound.dBFS
    return sound.apply_gain(difference)


def proc_raw_audio(audio_data, from_start=0, duration=None, before_end=0):
    '''Processes raw audio data and return wav and numpy arrays'''
    # Instanciate pydub AudioSegment object from raw audio
    audioObj = AudioSegment.from_file(BytesIO(audio_data))

    # Convert to mono mode with the desired sample rate
    audioObj = audioObj.set_frame_rate(SAMPLE_RATE).set_channels(1)
    # Normalize audio volume
    audioObj = normalize_volume(audioObj)
    # Trim by removing silence from beginning and end of the sound
    audioObj = trim_silence(audioObj)
    
    # Cut to the desired duration
    start = from_start * 1000
    if duration:
        end = start + duration * 1000
    else:
        end = len(audioObj) - before_end * 1000
    audioObj = audioObj[start:end]

    # Convert AudioSegment to wav format instance
    buf = BytesIO()
    audioObj.export(buf, format='wav')
    audio_wav = buf.getvalue()

    # Convert the AudioSegment to signal in form of numpy.array
    arr = audioObj.get_array_of_samples()
    audio_np = np.array(arr, dtype='float')
    
    # Normalize if specified
    # if normalized:
    #     audio_np = np.array(arr) / np.iinfo(arr.typecode).max
    #     y /= np.linalg.norm(y)
    # return y, sample_rate

    return audio_wav, audio_np


###==============================================


def obtain_features(y, sr=22050, duration=5, delta_width=9):
    '''Extracts sound features from given signal and returns them as a numpy array'''
    # --- MFCC (returns M: np.ndarray [shape=(n_mfcc, t)])
    mfcc = librosa.feature.mfcc(y, sr, 
                                n_mfcc=n_mfcc, n_mels=n_mels, 
                                n_fft=n_fft, hop_length=hop_length)

    return mfcc


def create_features_array(mfcc):#, mfcc_delta1, mfcc_delta2, spectr_c, spectr_r):
    '''Creates wholistic numpy array of means and variances out of given features'''
    make_meanvar = lambda mean, var: [item for mv in zip(mean, var) for item in mv]

    mean_var_ops = [
        (mfcc.mean(axis=1), mfcc.var(axis=1))
    ]

    mfcc_meanvars = sum([make_meanvar(mean, var) 
                         for mean, var in mean_var_ops], [])

    # features_array = mfcc_meanvars + spectr_meanvars
    features_array = [mfcc_meanvars]

    return features_array

# def get_features(y, sr=22050, duration=5, delta_width=9):
#     '''Returns numpy array of sound features obtained from signal'''
#     return create_features_array(*obtain_features(y, sr, duration, delta_width))


def get_features(y, duration=5, sr=SAMPLE_RATE):
    '''Returns numpy array of sound features obtained from signal'''

    fig, axes = plt.subplots(1, 2, figsize=(24, 2))
    
    # WAVE PLOT
    axes[0].set_title(f'Wave Plot for audio sample at {sr} hz')
    axes[0].set_facecolor('#B4E8CF')
    lbd.waveshow(y, sr=sr, color='#4300FF', ax=axes[0])

    # MELSPEC
    melspec = librosa.feature.melspectrogram(y=y, sr=sr)
    melspec = librosa.power_to_db(np.abs(melspec), ref=np.max)
    axes[1].set_title(f'Mel Spectogram | shape: {melspec.shape}')
    lbd.specshow(melspec, cmap='viridis', y_axis='mel', x_axis='time', ax=axes[1])

    st.pyplot(fig)

    pad_signal = lambda s, v: np.pad(
        s, 
        [(0, 0), (0, max(0, 216 - s.shape[1]))], 
        constant_values=v
    )

    # Prepare melspec for use
    melspec = pad_signal(melspec, melspec.min())
    melspec = melspec.reshape(1, *melspec.shape)

    # MFCC
    # mfcc = create_features_array(obtain_features(y, sr, duration, delta_width))
    # mfcc = np.array(mfcc).reshape(1, -1)

    return melspec
    # return mfcc