From 4f1e6cea387412ac9fe831cb781729fc1e40a76a Mon Sep 17 00:00:00 2001 From: Izan Gil <66965250+SrIzan10@users.noreply.github.com> Date: Sun, 13 Apr 2025 16:43:35 +0200 Subject: [PATCH] feat: batch stuff --- src/main.rs | 100 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 19 deletions(-) diff --git a/src/main.rs b/src/main.rs index bb71662..7649434 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use indicatif::{ProgressBar, ProgressStyle}; use scraper::{Html, Selector}; use regex::Regex; use std::fs::File; @@ -5,9 +6,10 @@ use std::io::Read; use std::process::Command; use rusqlite::{Connection, Result}; use std::time::Instant; +use rayon::prelude::*; fn main() -> Result<(), Box> { - let db = Connection::open("video_ids.db")?; + let mut db = Connection::open("video_ids.db")?; db.execute( "CREATE TABLE IF NOT EXISTS video_ids ( @@ -17,12 +19,60 @@ fn main() -> Result<(), Box> { [], )?; - get_ids_history(db)?; + get_ids_history(&mut db)?; + + let mut stmt = db.prepare("SELECT id FROM video_ids WHERE duration IS NULL OR duration = ''")?; + let ids_to_fetch: Vec = stmt.query_map([], |row| row.get(0))? + .collect::, _>>()?; + stmt.finalize()?; + + let total_ids = ids_to_fetch.len(); + if total_ids == 0 { + println!("No IDs to fetch."); + return Ok(()); + } + + let progress_bar = ProgressBar::new(total_ids as u64); + progress_bar.set_style(ProgressStyle::default_bar() + .template("{msg} [{elapsed_precise}] {wide_bar} {pos}/{len} ({eta})")? + .progress_chars("##-")); + progress_bar.set_message("Fetching video durations..."); + + let start_time = Instant::now(); + let batch_size = 30; + let results: Vec<(String, String)> = ids_to_fetch + .par_chunks(batch_size) + .flat_map(|chunk| { + chunk.iter().map(|id| { + let duration = get_video_duration(id).unwrap_or_else(|_| "Error".to_string()); + progress_bar.inc(1); + (id.clone(), duration) + }).collect::>() + }) + .collect(); + + let elapsed_time = start_time.elapsed(); + println!("Fetching video durations took: {:?}", elapsed_time); + + let update_start = Instant::now(); + let tx = db.transaction()?; + { + let mut stmt = tx.prepare_cached("UPDATE video_ids SET duration = ? WHERE id = ?")?; + for (id, duration) in results { + stmt.execute(rusqlite::params![duration, id])?; + progress_bar.inc(1); + } + } + tx.commit()?; + progress_bar.finish_with_message("Done!"); + let update_duration = update_start.elapsed(); + println!("Updating database took: {:?}", update_duration); + println!("Total time taken: {:?}", elapsed_time + update_duration); Ok(()) } -fn get_ids_history(mut db: Connection) -> Result<(), Box> { +fn get_ids_history(db: &mut Connection) -> Result<(), Box> { let read_start = Instant::now(); let mut id_db: Vec<&str> = Vec::new(); @@ -55,7 +105,7 @@ fn get_ids_history(mut db: Connection) -> Result<(), Box> // database inserting let insert_start = Instant::now(); - + let tx = db.transaction()?; { let mut stmt = tx.prepare_cached("INSERT OR IGNORE INTO video_ids (id) VALUES (?)")?; @@ -75,20 +125,32 @@ fn get_ids_history(mut db: Connection) -> Result<(), Box> fn get_video_duration(id: &str) -> Result> { let url = format!("https://www.youtube.com/watch?v={}", id); - let output = Command::new("yt-dlp") - .args(&[ - "--get-duration", - url.as_str(), - ]) - .output()?; + // Add timeout and retry logic + let max_retries = 3; + let mut retries = 0; - if output.status.success() { - let stdout = String::from_utf8_lossy(&output.stdout); - let video_url = stdout.lines().next().unwrap_or(""); - return Ok(video_url.to_string()); - } else { - let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error: {}", stderr); - return Err("Failed to get video link".into()); + while retries < max_retries { + let output = Command::new("yt-dlp") + .args(&[ + "--get-duration", + "--no-warnings", + "--socket-timeout", "10", + url.as_str(), + ]) + .output()?; + + if output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + let duration = stdout.trim(); + + if !duration.is_empty() { + return Ok(duration.to_string()); + } + } + + retries += 1; + std::thread::sleep(std::time::Duration::from_millis(500 * retries)); } -} + + Err(format!("Failed to get duration for video ID: {}", id).into()) +} \ No newline at end of file