Various cleanups

This commit is contained in:
FliegendeWurst 2021-04-21 21:38:08 +02:00
parent e94ce97896
commit 338c97821b
4 changed files with 58 additions and 165 deletions

59
Cargo.lock generated
View File

@ -17,7 +17,6 @@ dependencies = [
"indicatif", "indicatif",
"keyring", "keyring",
"once_cell", "once_cell",
"parking_lot",
"regex", "regex",
"reqwest", "reqwest",
"rpassword", "rpassword",
@ -832,15 +831,6 @@ dependencies = [
"regex", "regex",
] ]
[[package]]
name = "instant"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "ipnet" name = "ipnet"
version = "2.3.0" version = "2.3.0"
@ -885,15 +875,6 @@ version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41" checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
[[package]]
name = "lock_api"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a3c91c24eae6777794bb1997ad98bbb87daf92890acab859f7eaa4320333176"
dependencies = [
"scopeguard",
]
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.14" version = "0.4.14"
@ -1101,31 +1082,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
name = "parking_lot"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.1.0" version = "2.1.0"
@ -1345,15 +1301,6 @@ dependencies = [
"rand_core", "rand_core",
] ]
[[package]]
name = "redox_syscall"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
dependencies = [
"bitflags",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.4.5" version = "1.4.5"
@ -1481,12 +1428,6 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]] [[package]]
name = "scraper" name = "scraper"
version = "0.12.0" version = "0.12.0"

View File

@ -18,7 +18,6 @@ futures = "0.3.8"
futures-util = "0.3.8" futures-util = "0.3.8"
futures-channel = "0.3.8" futures-channel = "0.3.8"
regex = "1.3.7" regex = "1.3.7"
parking_lot = "0.11.0"
structopt = "0.3.13" structopt = "0.3.13"
rpassword = "5.0.0" rpassword = "5.0.0"
rprompt = "1.0.5" rprompt = "1.0.5"

View File

@ -7,8 +7,7 @@ use futures_channel::mpsc::UnboundedSender;
use futures_util::{stream::TryStreamExt, StreamExt}; use futures_util::{stream::TryStreamExt, StreamExt};
use ignore::gitignore::Gitignore; use ignore::gitignore::Gitignore;
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use once_cell::sync::Lazy; use once_cell::sync::{Lazy, OnceCell};
use parking_lot::Mutex;
use reqwest::{Client, Proxy}; use reqwest::{Client, Proxy};
use scraper::{ElementRef, Html, Selector}; use scraper::{ElementRef, Html, Selector};
use serde_json::json; use serde_json::json;
@ -18,12 +17,12 @@ use tokio::task::{self, JoinHandle};
use tokio_util::io::StreamReader; use tokio_util::io::StreamReader;
use url::Url; use url::Url;
use std::{future::Future, sync::atomic::AtomicBool}; use std::future::Future;
use std::io; use std::io;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::atomic::Ordering; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc; use std::sync::Arc;
use std::{collections::HashSet, default::Default, sync::atomic::AtomicUsize}; use std::collections::HashSet;
mod util; mod util;
use util::*; use util::*;
@ -35,9 +34,15 @@ static PROGRESS_BAR_ENABLED: AtomicBool = AtomicBool::new(false);
static PROGRESS_BAR: Lazy<ProgressBar> = Lazy::new(|| ProgressBar::new(0)); static PROGRESS_BAR: Lazy<ProgressBar> = Lazy::new(|| ProgressBar::new(0));
/// Global job queue /// Global job queue
static TASKS: Lazy<Mutex<Option<UnboundedSender<JoinHandle<()>>>>> = Lazy::new(Mutex::default); static TASKS: OnceCell<UnboundedSender<JoinHandle<()>>> = OnceCell::new();
static TASKS_RUNNING: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0)); static TASKS_RUNNING: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0));
macro_rules! spawn {
($e:expr) => {
TASKS.get().unwrap().unbounded_send(task::spawn($e)).unwrap();
};
}
macro_rules! log { macro_rules! log {
($lvl:expr, $($t:expr),+) => { ($lvl:expr, $($t:expr),+) => {
#[allow(unused_comparisons)] // 0 <= 0 #[allow(unused_comparisons)] // 0 <= 0
@ -74,7 +79,7 @@ macro_rules! warning {
println!("Warning: {}", format!("{} {} {:?}", $msg1, $msg2, $e).bright_yellow()); println!("Warning: {}", format!("{} {} {:?}", $msg1, $msg2, $e).bright_yellow());
}; };
(format => $($e:expr),+) => { (format => $($e:expr),+) => {
println!("Warning: {}", format!($($e),+)); println!("Warning: {}", format!($($e),+).bright_yellow());
}; };
} }
@ -89,13 +94,20 @@ macro_rules! error {
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let mut opt = Opt::from_args(); let opt = Opt::from_args();
if let Err(e) = real_main(opt).await {
error!(e);
}
}
async fn real_main(mut opt: Opt) -> Result<()> {
LOG_LEVEL.store(opt.verbose, Ordering::SeqCst);
#[cfg(windows)] #[cfg(windows)]
let _ = colored::control::set_virtual_terminal(true); let _ = colored::control::set_virtual_terminal(true);
// use UNC paths on Windows // use UNC paths on Windows
opt.output = fs::canonicalize(opt.output).await.expect("failed to canonicalize directory"); opt.output = fs::canonicalize(opt.output).await.context("failed to canonicalize output directory")?;
LOG_LEVEL.store(opt.verbose, Ordering::SeqCst); create_dir(&opt.output).await.context("failed to create output directory")?;
create_dir(&opt.output).await.expect("failed to create output directory");
// load .iliasignore file // load .iliasignore file
opt.output.push(".iliasignore"); opt.output.push(".iliasignore");
@ -104,18 +116,19 @@ async fn main() {
warning!(err); warning!(err);
} }
opt.output.pop(); opt.output.pop();
// loac .iliaslogin file // loac .iliaslogin file
opt.output.push(".iliaslogin"); opt.output.push(".iliaslogin");
let login = std::fs::read_to_string(&opt.output); let login = std::fs::read_to_string(&opt.output);
let (user, pass) = if let Ok(login) = login { let (user, pass) = if let Ok(login) = login {
let mut lines = login.split('\n'); let mut lines = login.split('\n');
let user = lines.next().expect("missing user in .iliaslogin"); let user = lines.next().context("missing user in .iliaslogin")?;
let pass = lines.next().expect("missing password in .iliaslogin"); let pass = lines.next().context("missing password in .iliaslogin")?;
let user = user.trim(); let user = user.trim();
let pass = pass.trim(); let pass = pass.trim();
(user.to_owned(), pass.to_owned()) (user.to_owned(), pass.to_owned())
} else { } else {
ask_user_pass(&opt).expect("credentials input") ask_user_pass(&opt).context("credentials input failed")?
}; };
opt.output.pop(); opt.output.pop();
@ -134,7 +147,7 @@ async fn main() {
} }
let ilias = Arc::new(ilias); let ilias = Arc::new(ilias);
let (tx, mut rx) = futures_channel::mpsc::unbounded::<JoinHandle<()>>(); let (tx, mut rx) = futures_channel::mpsc::unbounded::<JoinHandle<()>>();
*TASKS.lock() = Some(tx.clone()); TASKS.get_or_init(|| tx.clone());
TASKS_RUNNING.add_permits(ilias.opt.jobs); TASKS_RUNNING.add_permits(ilias.opt.jobs);
PROGRESS_BAR_ENABLED.store(atty::is(atty::Stream::Stdout), Ordering::SeqCst); PROGRESS_BAR_ENABLED.store(atty::is(atty::Stream::Stdout), Ordering::SeqCst);
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) { if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
@ -145,30 +158,13 @@ async fn main() {
} }
if let Some(url) = ilias.opt.sync_url.as_ref() { if let Some(url) = ilias.opt.sync_url.as_ref() {
// TODO: this should be unified with the download logic below // TODO: this should be unified with the download logic below
let course = ilias.get_course_content(&URL::from_href(url).expect("invalid URL")).await.expect("invalid response"); let obj = Object::from_url(URL::from_href(url).expect("invalid URL"), "".to_owned(), None).expect("invalid object"); // name can be empty for first element
if let Some(s) = course.1.as_ref() { spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
let path = ilias.opt.output.join("course.html");
write_file_data(&path, &mut s.as_bytes()).await.expect("failed to write course page html");
}
for item in course.0 {
if let Ok(item) = item {
let ilias = Arc::clone(&ilias);
let path = ilias.opt.output.join(file_escape(item.name()));
tx.unbounded_send(task::spawn(process_gracefully(ilias, path, item))).unwrap();
}
}
} else { } else {
let desktop = ilias.personal_desktop().await.context("Failed to load personal desktop"); let desktop = ilias.personal_desktop().await.context("Failed to load personal desktop")?;
match desktop { for item in desktop.items {
Ok(desktop) => { let path = ilias.opt.output.join(file_escape(item.name()));
for item in desktop.items { tx.unbounded_send(task::spawn(process_gracefully(ilias.clone(), path, item))).unwrap();
let mut path = ilias.opt.output.clone();
path.push(file_escape(item.name()));
let ilias = Arc::clone(&ilias);
let _ = tx.unbounded_send(task::spawn(process_gracefully(ilias, path, item)));
}
},
Err(e) => error!(e),
} }
} }
while let Either::Left((task, _)) = future::select(rx.next(), future::ready(())).await { while let Either::Left((task, _)) = future::select(rx.next(), future::ready(())).await {
@ -190,12 +186,7 @@ async fn main() {
PROGRESS_BAR.set_style(ProgressStyle::default_bar().template("[{pos}/{len}] {msg}")); PROGRESS_BAR.set_style(ProgressStyle::default_bar().template("[{pos}/{len}] {msg}"));
PROGRESS_BAR.finish_with_message("done"); PROGRESS_BAR.finish_with_message("done");
} }
} Ok(())
macro_rules! spawn {
($e:expr) => {
TASKS.lock().as_ref().unwrap().unbounded_send(task::spawn($e)).unwrap();
};
} }
fn ask_user_pass(opt: &Opt) -> Result<(String, String)> { fn ask_user_pass(opt: &Opt) -> Result<(String, String)> {
@ -299,7 +290,7 @@ use crate::selectors::*;
const NO_ENTRIES: &str = "Keine Einträge"; const NO_ENTRIES: &str = "Keine Einträge";
async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()> { async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let relative_path = path.strip_prefix(&ilias.opt.output).unwrap(); let relative_path = path.strip_prefix(&ilias.opt.output).unwrap();
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) { if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
PROGRESS_BAR.inc(1); PROGRESS_BAR.inc(1);
@ -461,8 +452,8 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
log!(2, "{}", json); log!(2, "{}", json);
let url = json let url = json
.pointer("/streams/0/sources/mp4/0/src") .pointer("/streams/0/sources/mp4/0/src")
.map(|x| x.as_str())
.context("video src not found")? .context("video src not found")?
.as_str()
.context("video src not string")?; .context("video src not string")?;
let meta = fs::metadata(&path).await; let meta = fs::metadata(&path).await;
if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos { if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos {
@ -497,7 +488,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
let html_text = data.await?.text().await?; let html_text = data.await?.text().await?;
let url = { let url = {
let html = Html::parse_document(&html_text); let html = Html::parse_document(&html_text);
//https://ilias.studium.kit.edu/ilias.php?ref_id=122&cmdClass=ilobjforumgui&frm_tt_e39_122_trows=800&cmd=showThreads&cmdNode=uf:lg&baseClass=ilrepositorygui
let thread_count_selector = html.select(&a) let thread_count_selector = html.select(&a)
.flat_map(|x| x.value().attr("href")) .flat_map(|x| x.value().attr("href"))
.find(|x| x.contains("trows=800")); .find(|x| x.contains("trows=800"));
@ -525,13 +515,9 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
} }
let cells = row.select(&td).collect::<Vec<_>>(); let cells = row.select(&td).collect::<Vec<_>>();
if cells.len() != 6 { if cells.len() != 6 {
log!( warning!(format =>
0,
"Warning: {}{} {} {}", "Warning: {}{} {} {}",
"unusual table row (".bright_yellow(), "unusual table row (", cells.len(), "cells) in", url.to_string()
cells.len().to_string().bright_yellow(),
"cells) in".bright_yellow(),
url.to_string().bright_yellow()
); );
continue; continue;
} }
@ -569,7 +555,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
if available_posts <= saved_posts && !ilias.opt.force { if available_posts <= saved_posts && !ilias.opt.force {
continue; continue;
} }
log!(0, "New posts in {:?}..", path);
let ilias = Arc::clone(&ilias); let ilias = Arc::clone(&ilias);
spawn!(process_gracefully(ilias, path, object)); spawn!(process_gracefully(ilias, path, object));
} }
@ -615,9 +600,10 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
.to_owned(); .to_owned();
let name = format!("{}_{}_{}.html", id, author, title.trim()); let name = format!("{}_{}_{}.html", id, author, title.trim());
let data = container.inner_html(); let data = container.inner_html();
let mut path = path.clone(); let path = path.join(file_escape(&name));
path.push(file_escape(&name)); let relative_path = relative_path.join(file_escape(&name));
spawn!(handle_gracefully(async move { spawn!(handle_gracefully(async move {
log!(0, "Writing {}", relative_path.display());
write_file_data(&path, &mut data.as_bytes()) write_file_data(&path, &mut data.as_bytes())
.await .await
.context("failed to write forum post") .context("failed to write forum post")
@ -795,9 +781,7 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
} }
let head = head.unwrap(); let head = head.unwrap();
let url = head.url().as_str(); let url = head.url().as_str();
path.push(file_escape(&name)); write_file_data(path.join(file_escape(&name)), &mut url.as_bytes()).await?;
write_file_data(&path, &mut url.as_bytes()).await?;
path.pop();
} }
} else { } else {
log!(0, "Writing {}", relative_path.to_string_lossy()); log!(0, "Writing {}", relative_path.to_string_lossy());
@ -1120,14 +1104,17 @@ impl Object {
} }
fn from_link(item: ElementRef, link: ElementRef) -> Result<Self> { fn from_link(item: ElementRef, link: ElementRef) -> Result<Self> {
let mut name = link let name = link
.text() .text()
.collect::<String>() .collect::<String>()
.replace('/', "-") .replace('/', "-")
.trim() .trim()
.to_owned(); .to_owned();
let mut url = URL::from_href(link.value().attr("href").context("link missing href")?)?; let url = URL::from_href(link.value().attr("href").context("link missing href")?)?;
Object::from_url(url, name, Some(item))
}
fn from_url(mut url: URL, mut name: String, item: Option<ElementRef>) -> Result<Self> {
if url.thr_pk.is_some() { if url.thr_pk.is_some() {
return Ok(Thread { url }); return Ok(Thread { url });
} }
@ -1173,7 +1160,7 @@ impl Object {
return Ok(Generic { name, url }); return Ok(Generic { name, url });
} else { } else {
let item_prop = Selector::parse("span.il_ItemProperty").unwrap(); let item_prop = Selector::parse("span.il_ItemProperty").unwrap();
let mut item_props = item.select(&item_prop); let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop);
let ext = item_props.next().context("cannot find file extension")?; let ext = item_props.next().context("cannot find file extension")?;
let version = item_props let version = item_props
.nth(1) .nth(1)
@ -1296,12 +1283,3 @@ impl URL {
}) })
} }
} }
#[cfg(not(target_os = "windows"))]
const INVALID: &[char] = &['/', '\\'];
#[cfg(target_os = "windows")]
const INVALID: &[char] = &['/', '\\', ':', '<', '>', '"', '|', '?', '*'];
fn file_escape(s: &str) -> String {
s.replace(INVALID, "-")
}

View File

@ -4,11 +4,11 @@ use tokio::io::{AsyncRead, BufWriter};
use std::path::Path; use std::path::Path;
use super::Result; use crate::Result;
pub async fn write_file_data<R: ?Sized>(path: &Path, data: &mut R) -> Result<()> pub async fn write_file_data<R: ?Sized>(path: impl AsRef<Path>, data: &mut R) -> Result<()>
where R: AsyncRead + Unpin { where R: AsyncRead + Unpin {
let file = AsyncFile::create(&path).await.context("failed to create file")?; let file = AsyncFile::create(path.as_ref()).await.context("failed to create file")?;
let mut file = BufWriter::new(file); let mut file = BufWriter::new(file);
tokio::io::copy(data, &mut file).await.context("failed to write to file")?; tokio::io::copy(data, &mut file).await.context("failed to write to file")?;
Ok(()) Ok(())
@ -24,36 +24,11 @@ pub async fn create_dir(path: &Path) -> Result<()> {
Ok(()) Ok(())
} }
// remove once result_flattening is stable (https://github.com/rust-lang/rust/issues/70142) #[cfg(not(target_os = "windows"))]
pub trait Result2 { const INVALID: &[char] = &['/', '\\'];
type V; #[cfg(target_os = "windows")]
type E; const INVALID: &[char] = &['/', '\\', ':', '<', '>', '"', '|', '?', '*'];
type F;
fn flatten2(self) -> Result<Self::V, Self::E> pub fn file_escape(s: &str) -> String {
where s.replace(INVALID, "-")
Self::F: Into<Self::E>;
fn flatten_with<O: FnOnce(Self::F) -> Self::E>(self, op: O) -> Result<Self::V, Self::E>;
}
impl<V, E, F> Result2 for Result<Result<V, F>, E> {
type V = V;
type E = E;
type F = F;
fn flatten2(self) -> Result<Self::V, Self::E>
where
Self::F: Into<Self::E>,
{
self.flatten_with(|e| e.into())
}
fn flatten_with<O: FnOnce(Self::F) -> Self::E>(self, op: O) -> Result<Self::V, Self::E> {
match self {
Ok(Ok(v)) => Ok(v),
Ok(Err(f)) => Err(op(f)),
Err(e) => Err(e),
}
}
} }