feat: batch stuff
This commit is contained in:
100
src/main.rs
100
src/main.rs
@@ -1,3 +1,4 @@
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use scraper::{Html, Selector};
|
||||
use regex::Regex;
|
||||
use std::fs::File;
|
||||
@@ -5,9 +6,10 @@ use std::io::Read;
|
||||
use std::process::Command;
|
||||
use rusqlite::{Connection, Result};
|
||||
use std::time::Instant;
|
||||
use rayon::prelude::*;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let db = Connection::open("video_ids.db")?;
|
||||
let mut db = Connection::open("video_ids.db")?;
|
||||
|
||||
db.execute(
|
||||
"CREATE TABLE IF NOT EXISTS video_ids (
|
||||
@@ -17,12 +19,60 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
[],
|
||||
)?;
|
||||
|
||||
get_ids_history(db)?;
|
||||
get_ids_history(&mut db)?;
|
||||
|
||||
let mut stmt = db.prepare("SELECT id FROM video_ids WHERE duration IS NULL OR duration = ''")?;
|
||||
let ids_to_fetch: Vec<String> = stmt.query_map([], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?;
|
||||
stmt.finalize()?;
|
||||
|
||||
let total_ids = ids_to_fetch.len();
|
||||
if total_ids == 0 {
|
||||
println!("No IDs to fetch.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let progress_bar = ProgressBar::new(total_ids as u64);
|
||||
progress_bar.set_style(ProgressStyle::default_bar()
|
||||
.template("{msg} [{elapsed_precise}] {wide_bar} {pos}/{len} ({eta})")?
|
||||
.progress_chars("##-"));
|
||||
progress_bar.set_message("Fetching video durations...");
|
||||
|
||||
let start_time = Instant::now();
|
||||
let batch_size = 30;
|
||||
let results: Vec<(String, String)> = ids_to_fetch
|
||||
.par_chunks(batch_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk.iter().map(|id| {
|
||||
let duration = get_video_duration(id).unwrap_or_else(|_| "Error".to_string());
|
||||
progress_bar.inc(1);
|
||||
(id.clone(), duration)
|
||||
}).collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let elapsed_time = start_time.elapsed();
|
||||
println!("Fetching video durations took: {:?}", elapsed_time);
|
||||
|
||||
let update_start = Instant::now();
|
||||
let tx = db.transaction()?;
|
||||
{
|
||||
let mut stmt = tx.prepare_cached("UPDATE video_ids SET duration = ? WHERE id = ?")?;
|
||||
for (id, duration) in results {
|
||||
stmt.execute(rusqlite::params![duration, id])?;
|
||||
progress_bar.inc(1);
|
||||
}
|
||||
}
|
||||
tx.commit()?;
|
||||
progress_bar.finish_with_message("Done!");
|
||||
let update_duration = update_start.elapsed();
|
||||
println!("Updating database took: {:?}", update_duration);
|
||||
println!("Total time taken: {:?}", elapsed_time + update_duration);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_ids_history(mut db: Connection) -> Result<(), Box<dyn std::error::Error>> {
|
||||
fn get_ids_history(db: &mut Connection) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let read_start = Instant::now();
|
||||
|
||||
let mut id_db: Vec<&str> = Vec::new();
|
||||
@@ -55,7 +105,7 @@ fn get_ids_history(mut db: Connection) -> Result<(), Box<dyn std::error::Error>>
|
||||
|
||||
// database inserting
|
||||
let insert_start = Instant::now();
|
||||
|
||||
|
||||
let tx = db.transaction()?;
|
||||
{
|
||||
let mut stmt = tx.prepare_cached("INSERT OR IGNORE INTO video_ids (id) VALUES (?)")?;
|
||||
@@ -75,20 +125,32 @@ fn get_ids_history(mut db: Connection) -> Result<(), Box<dyn std::error::Error>>
|
||||
fn get_video_duration(id: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let url = format!("https://www.youtube.com/watch?v={}", id);
|
||||
|
||||
let output = Command::new("yt-dlp")
|
||||
.args(&[
|
||||
"--get-duration",
|
||||
url.as_str(),
|
||||
])
|
||||
.output()?;
|
||||
// Add timeout and retry logic
|
||||
let max_retries = 3;
|
||||
let mut retries = 0;
|
||||
|
||||
if output.status.success() {
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let video_url = stdout.lines().next().unwrap_or("");
|
||||
return Ok(video_url.to_string());
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
eprintln!("Error: {}", stderr);
|
||||
return Err("Failed to get video link".into());
|
||||
while retries < max_retries {
|
||||
let output = Command::new("yt-dlp")
|
||||
.args(&[
|
||||
"--get-duration",
|
||||
"--no-warnings",
|
||||
"--socket-timeout", "10",
|
||||
url.as_str(),
|
||||
])
|
||||
.output()?;
|
||||
|
||||
if output.status.success() {
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let duration = stdout.trim();
|
||||
|
||||
if !duration.is_empty() {
|
||||
return Ok(duration.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
retries += 1;
|
||||
std::thread::sleep(std::time::Duration::from_millis(500 * retries));
|
||||
}
|
||||
}
|
||||
|
||||
Err(format!("Failed to get duration for video ID: {}", id).into())
|
||||
}
|
||||
Reference in New Issue
Block a user