diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..96dfc7b --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +use std::{path::PathBuf, sync::atomic::{AtomicBool, AtomicUsize}}; + +use anyhow::{anyhow, Context, Result}; +use colored::Colorize as _; +use indicatif::ProgressBar; +use once_cell::sync::Lazy; +use structopt::StructOpt; + +#[derive(Debug, StructOpt)] +#[structopt(name = env!("CARGO_PKG_NAME"))] +pub struct Opt { + /// Do not download files + #[structopt(short, long)] + pub skip_files: bool, + + /// Do not download Opencast videos + #[structopt(short, long)] + pub no_videos: bool, + + /// Download forum content + #[structopt(short = "t", long)] + pub forum: bool, + + /// Re-download already present files + #[structopt(short)] + pub force: bool, + + /// Use content tree (experimental) + #[structopt(long)] + pub content_tree: bool, + + /// Re-check OpenCast lectures (slow) + #[structopt(long)] + pub check_videos: bool, + + /// Verbose logging + #[structopt(short, multiple = true, parse(from_occurrences))] + pub verbose: usize, + + /// Output directory + #[structopt(short, long, parse(from_os_str))] + pub output: PathBuf, + + /// Parallel download jobs + #[structopt(short, long, default_value = "1")] + pub jobs: usize, + + /// Proxy, e.g. socks5h://127.0.0.1:1080 + #[structopt(short, long)] + pub proxy: Option, + + /// Use the system keyring + #[structopt(long)] + #[cfg(feature = "keyring-auth")] + pub keyring: bool, + + /// KIT account username + #[structopt(short = "U", long)] + pub username: Option, + + /// KIT account password + #[structopt(short = "P", long)] + pub password: Option, + + /// ILIAS page to download + #[structopt(long)] + pub sync_url: Option, + + /// Requests per minute + #[structopt(long, default_value = "8")] + pub rate: usize +} + +pub static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0); +pub static PROGRESS_BAR_ENABLED: AtomicBool = AtomicBool::new(false); +pub static PROGRESS_BAR: Lazy = Lazy::new(|| ProgressBar::new(0)); + +macro_rules! log { + ($lvl:expr, $($t:expr),+) => { + #[allow(unused_comparisons)] // 0 <= 0 + if $lvl <= crate::cli::LOG_LEVEL.load(std::sync::atomic::Ordering::SeqCst) { + if crate::cli::PROGRESS_BAR_ENABLED.load(std::sync::atomic::Ordering::SeqCst) { + crate::cli::PROGRESS_BAR.println(format!($($t),+)); + } else { + println!($($t),+); + } + } + } +} + +macro_rules! info { + ($t:tt) => { + println!($t); + }; +} + +macro_rules! success { + ($t:tt) => { + println!("{}", format!($t).bright_green()); + }; +} + +macro_rules! warning { + ($e:expr) => { + println!("Warning: {}", format!("{:?}", $e).bright_yellow()); + }; + ($msg:expr, $e:expr) => { + println!("Warning: {}", format!("{} {:?}", $msg, $e).bright_yellow()); + }; + ($msg1:expr, $msg2:expr, $e:expr) => { + println!("Warning: {}", format!("{} {} {:?}", $msg1, $msg2, $e).bright_yellow()); + }; + (format => $($e:expr),+) => { + println!("Warning: {}", format!($($e),+).bright_yellow()); + }; +} + +macro_rules! error { + ($($prefix:expr),+; $e:expr) => { + println!("{}: {}", format!($($prefix),+), format!("{:?}", $e).bright_red()); + }; + ($e:expr) => { + println!("Error: {}", format!("{:?}", $e).bright_red()); + }; +} + +pub fn ask_user_pass(opt: &Opt) -> Result<(String, String)> { + let user = if let Some(username) = opt.username.as_ref() { + username.clone() + } else { + rprompt::prompt_reply_stdout("Username: ").context("username prompt")? + }; + #[cfg(feature = "keyring-auth")] + let (pass, should_store); + #[cfg(feature = "keyring-auth")] + let keyring = Lazy::new(|| keyring::Keyring::new(env!("CARGO_PKG_NAME"), &user)); + #[cfg(not(feature = "keyring-auth"))] + let pass; + cfg_if::cfg_if! { // TODO: deduplicate the logic below + if #[cfg(feature = "keyring-auth")] { + if let Some(password) = opt.password.as_ref() { + pass = password.clone(); + should_store = true; + } else if opt.keyring { + match keyring.get_password() { + Ok(password) => { + pass = password; + should_store = false; + }, + Err(e) => { + error!(e); + pass = rpassword::read_password_from_tty(Some("Password: ")).context("password prompt")?; + should_store = true; + } + } + } else { + pass = rpassword::read_password_from_tty(Some("Password: ")).context("password prompt")?; + should_store = true; + } + } else { + if let Some(password) = opt.password.as_ref() { + pass = password.clone(); + } else { + pass = rpassword::read_password_from_tty(Some("Password: ")).context("password prompt")?; + } + } + }; + #[cfg(feature = "keyring-auth")] + if should_store && opt.keyring { + keyring.set_password(&pass).map_err(|x| anyhow!(x.to_string()))?; + } + Ok((user, pass)) +} diff --git a/src/ilias.rs b/src/ilias.rs new file mode 100644 index 0000000..f68ebae --- /dev/null +++ b/src/ilias.rs @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +use std::error::Error as _; + +use anyhow::{anyhow, Context, Result}; +use colored::Colorize; +use ignore::gitignore::Gitignore; +use reqwest::{Client, IntoUrl, Proxy, Url}; +use scraper::{ElementRef, Html, Selector}; +use serde_json::json; + +use crate::{ILIAS_URL, cli::Opt, get_request_ticket, selectors::*}; + +pub struct ILIAS { + pub opt: Opt, + pub ignore: Gitignore, + // TODO: use these for re-authentication in case of session timeout/invalidation + user: String, + pass: String, + client: Client, +} + +/// Returns true if the error is caused by: +/// "http2 error: protocol error: not a result of an error" +fn error_is_http2(error: &reqwest::Error) -> bool { + error.source() // hyper::Error + .map(|x| x.downcast_ref::()) + .flatten() + .map(|x| x.reason()) + .flatten() + .map(|x| x == h2::Reason::NO_ERROR) + .unwrap_or(false) +} + +impl ILIAS { + pub async fn login(opt: Opt, user: impl Into, pass: impl Into, ignore: Gitignore) -> Result { + let user = user.into(); + let pass = pass.into(); + let mut builder = Client::builder() + .cookie_store(true) + .user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"))); + if let Some(proxy) = opt.proxy.as_ref() { + let proxy = Proxy::all(proxy)?; + builder = builder.proxy(proxy); + } + let client = builder + // timeout is infinite by default + .build()?; + let this = ILIAS { opt, ignore, user, pass, client }; + info!("Logging into ILIAS using KIT account.."); + let session_establishment = this.client + .post("https://ilias.studium.kit.edu/Shibboleth.sso/Login") + .form(&json!({ + "sendLogin": "1", + "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth", + "target": "/shib_login.php?target=", + "home_organization_selection": "Mit KIT-Account anmelden" + })) + .send().await?; + let url = session_establishment.url().clone(); + let text = session_establishment.text().await?; + let dom_sso = Html::parse_document(text.as_str()); + let csrf_token = dom_sso + .select(&Selector::parse(r#"input[name="csrf_token"]"#).unwrap()) + .next().context("no csrf token")?; + info!("Logging into Shibboleth.."); + let login_response = this.client + .post(url) + .form(&json!({ + "j_username": &this.user, + "j_password": &this.pass, + "_eventId_proceed": "", + "csrf_token": csrf_token.value().attr("value").context("no csrf token")?, + })) + .send().await? + .text().await?; + let dom = Html::parse_document(&login_response); + let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap(); + let saml = dom + .select(&saml) + .next().context("no SAML response, incorrect password?")?; + let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap(); + let relay_state = dom.select(&relay_state).next().context("no relay state")?; + info!("Logging into ILIAS.."); + this.client + .post("https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST") + .form(&json!({ + "SAMLResponse": saml.value().attr("value").context("no SAML value")?, + "RelayState": relay_state.value().attr("value").context("no RelayState value")? + })) + .send().await?; + success!("Logged in!"); + Ok(this) + } + + pub async fn download(&self, url: &str) -> Result { + get_request_ticket().await; + log!(2, "Downloading {}", url); + let url = if url.starts_with("http://") || url.starts_with("https://") { + url.to_owned() + } else if url.starts_with("ilias.studium.kit.edu") { + format!("https://{}", url) + } else { + format!("{}{}", ILIAS_URL, url) + }; + for attempt in 1..10 { + let result = self.client.get(url.clone()).send().await; + match result { + Ok(x) => return Ok(x), + Err(e) if attempt <= 3 && error_is_http2(&e) => { + warning!("encountered HTTP/2 NO_ERROR, retrying download.."); + continue + }, + Err(e) => return Err(e.into()) + } + } + unreachable!() + } + + pub async fn head(&self, url: U) -> Result { + get_request_ticket().await; + let url = url.into_url()?; + for attempt in 1..10 { + let result = self.client.head(url.clone()).send().await; + match result { + Ok(x) => return Ok(x), + Err(e) if attempt <= 3 && error_is_http2(&e) => { + warning!("encountered HTTP/2 NO_ERROR, retrying HEAD request.."); + continue + }, + Err(e) => return Err(e) + } + } + unreachable!() + } + + pub async fn get_html(&self, url: &str) -> Result { + let text = self.download(url).await?.text().await?; + let html = Html::parse_document(&text); + if html.select(&alert_danger).next().is_some() { + Err(anyhow!("ILIAS error")) + } else { + Ok(html) + } + } + + pub async fn get_html_fragment(&self, url: &str) -> Result { + let text = self.download(url).await?.text().await?; + let html = Html::parse_fragment(&text); + if html.select(&alert_danger).next().is_some() { + Err(anyhow!("ILIAS error")) + } else { + Ok(html) + } + } + + pub fn get_items(html: &Html) -> Vec> { + html.select(&container_items) + .flat_map(|item| { + item.select(&container_item_title) + .next() + .map(|link| Object::from_link(item, link)) + // items without links are ignored + }) + .collect() + } + + /// Returns subfolders and the main text on the course page. + pub async fn get_course_content(&self, url: &URL) -> Result<(Vec>, Option)> { + let html = self.get_html(&url.url).await?; + let main_text = if let Some(el) = html.select(&il_content_container).next() { + if !el.children().flat_map(|x| x.value().as_element()).next().map(|x| + x.attr("class").unwrap_or_default().contains("ilContainerBlock")).unwrap_or(false) + && el.inner_html().len() > 40 { + // ^ minimum length of useful content? + Some(el.inner_html()) + } else { + // first element is the content overview => no custom text (?) + None + } + } else { + None + }; + Ok((ILIAS::get_items(&html), main_text)) + } + + pub async fn personal_desktop(&self) -> Result { + let html = self.get_html("https://ilias.studium.kit.edu/ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems").await?; + let items = ILIAS::get_items(&html) + .into_iter() + .flat_map(Result::ok) + .collect(); + Ok(Dashboard { items }) + } + + pub async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result> { + // TODO: this magically does not return sub-folders + // opening the same url in browser does show sub-folders?! + let url = format!( + "{}ilias.php?ref_id={}&cmdClass=ilobjcoursegui&cmd=showRepTree&cmdNode={}&baseClass=ilRepositoryGUI&cmdMode=asynch&exp_cmd=getNodeAsync&node_id=exp_node_rep_exp_{}&exp_cont=il_expl2_jstree_cont_rep_exp&searchterm=", + ILIAS_URL, ref_id, cmd_node, ref_id + ); + let html = self.get_html_fragment(&url).await?; + let mut items = Vec::new(); + for link in html.select(&a) { + if link.value().attr("href").is_some() { + items.push(Object::from_link(link, link)?); + } // else: disabled course + } + Ok(items) + } +} + +#[derive(Debug)] +pub struct Dashboard { + pub items: Vec, +} + +#[derive(Debug)] +pub enum Object { + Course { name: String, url: URL }, + Folder { name: String, url: URL }, + File { name: String, url: URL }, + Forum { name: String, url: URL }, + Thread { url: URL }, + Wiki { name: String, url: URL }, + ExerciseHandler { name: String, url: URL }, + Weblink { name: String, url: URL }, + Survey { name: String, url: URL }, + Presentation { name: String, url: URL }, + PluginDispatch { name: String, url: URL }, + Video { url: URL }, + Generic { name: String, url: URL }, +} + +use Object::*; + +impl Object { + pub fn name(&self) -> &str { + match self { + Course { name, .. } + | Folder { name, .. } + | File { name, .. } + | Forum { name, .. } + | Wiki { name, .. } + | Weblink { name, .. } + | Survey { name, .. } + | Presentation { name, .. } + | ExerciseHandler { name, .. } + | PluginDispatch { name, .. } + | Generic { name, .. } => &name, + Thread { url } => &url.thr_pk.as_ref().unwrap(), + Video { url } => &url.url, + } + } + + pub fn url(&self) -> &URL { + match self { + Course { url, .. } + | Folder { url, .. } + | File { url, .. } + | Forum { url, .. } + | Thread { url } + | Wiki { url, .. } + | Weblink { url, .. } + | Survey { url, .. } + | Presentation { url, .. } + | ExerciseHandler { url, .. } + | PluginDispatch { url, .. } + | Video { url } + | Generic { url, .. } => &url, + } + } + + pub fn kind(&self) -> &str { + match self { + Course { .. } => "course", + Folder { .. } => "folder", + File { .. } => "file", + Forum { .. } => "forum", + Thread { .. } => "thread", + Wiki { .. } => "wiki", + Weblink { .. } => "weblink", + Survey { .. } => "survey", + Presentation { .. } => "presentation", + ExerciseHandler { .. } => "exercise handler", + PluginDispatch { .. } => "plugin dispatch", + Video { .. } => "video", + Generic { .. } => "generic", + } + } + + pub fn is_dir(&self) -> bool { + match self { + Course { .. } + | Folder { .. } + | Forum { .. } + | Thread { .. } + | Wiki { .. } + | ExerciseHandler { .. } + | PluginDispatch { .. } => true, + _ => false, + } + } + + pub fn from_link(item: ElementRef, link: ElementRef) -> Result { + let name = link + .text() + .collect::() + .replace('/', "-") + .trim() + .to_owned(); + let url = URL::from_href(link.value().attr("href").context("link missing href")?)?; + Object::from_url(url, name, Some(item)) + } + + pub fn from_url(mut url: URL, mut name: String, item: Option) -> Result { + if url.thr_pk.is_some() { + return Ok(Thread { url }); + } + + if url + .url + .starts_with("https://ilias.studium.kit.edu/goto.php") + { + let target = url.target.as_deref().unwrap_or("NONE"); + if target.starts_with("wiki_") { + return Ok(Wiki { + name, + url, // TODO: insert ref_id here + }); + } + if target.starts_with("root_") { + // magazine link + return Ok(Generic { name, url }); + } + if target.starts_with("crs_") { + let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap(); + url.ref_id = ref_id.to_owned(); + return Ok(Course { name, url }); + } + if target.starts_with("frm_") { + // TODO: extract post link? (this codepath should only be hit when parsing the content tree) + let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap(); + url.ref_id = ref_id.to_owned(); + return Ok(Forum { name, url }); + } + if target.starts_with("lm_") { + // fancy interactive task + return Ok(Presentation { name, url }); + } + if target.starts_with("fold_") { + let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap(); + url.ref_id = ref_id.to_owned(); + return Ok(Folder { name, url }); + } + if target.starts_with("file_") { + if !target.ends_with("download") { + // download page containing metadata + return Ok(Generic { name, url }); + } else { + let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop); + let ext = item_props.next().context("cannot find file extension")?; + let version = item_props + .nth(1) + .context("cannot find 3rd file metadata")? + .text() + .collect::(); + let version = version.trim(); + if let Some(v) = version.strip_prefix("Version: ") { + name += "_v"; + name += v; + } + return Ok(File { + name: format!("{}.{}", name, ext.text().collect::().trim()), + url, + }); + } + } + return Ok(Generic { name, url }); + } + + if url.cmd.as_deref() == Some("showThreads") { + return Ok(Forum { name, url }); + } + + // class name is *sometimes* in CamelCase + Ok(match &*url.baseClass.to_ascii_lowercase() { + "ilexercisehandlergui" => ExerciseHandler { name, url }, + "ililwikihandlergui" => Wiki { name, url }, + "illinkresourcehandlergui" => Weblink { name, url }, + "ilobjsurveygui" => Survey { name, url }, + "illmpresentationgui" => Presentation { name, url }, + "ilrepositorygui" => match url.cmd.as_deref() { + Some("view") | Some("render") => Folder { name, url }, + Some(_) => Generic { name, url }, + None => Course { name, url }, + }, + "ilobjplugindispatchgui" => PluginDispatch { name, url }, + _ => Generic { name, url }, + }) + } +} + +#[allow(non_snake_case)] +#[derive(Debug)] +pub struct URL { + pub url: String, + baseClass: String, + cmdClass: Option, + cmdNode: Option, + pub cmd: Option, + forwardCmd: Option, + pub thr_pk: Option, + pos_pk: Option, + pub ref_id: String, + target: Option, + file: Option, +} + +#[allow(non_snake_case)] +impl URL { + pub fn raw(url: String) -> Self { + URL { + url, + baseClass: String::new(), + cmdClass: None, + cmdNode: None, + cmd: None, + forwardCmd: None, + thr_pk: None, + pos_pk: None, + ref_id: String::new(), + target: None, + file: None, + } + } + + pub fn from_href(href: &str) -> Result { + let url = if !href.starts_with(ILIAS_URL) { + Url::parse(&format!("{}{}", ILIAS_URL, href))? + } else { + Url::parse(href)? + }; + let mut baseClass = String::new(); + let mut cmdClass = None; + let mut cmdNode = None; + let mut cmd = None; + let mut forwardCmd = None; + let mut thr_pk = None; + let mut pos_pk = None; + let mut ref_id = String::new(); + let mut target = None; + let mut file = None; + for (k, v) in url.query_pairs() { + match &*k { + "baseClass" => baseClass = v.into_owned(), + "cmdClass" => cmdClass = Some(v.into_owned()), + "cmdNode" => cmdNode = Some(v.into_owned()), + "cmd" => cmd = Some(v.into_owned()), + "forwardCmd" => forwardCmd = Some(v.into_owned()), + "thr_pk" => thr_pk = Some(v.into_owned()), + "pos_pk" => pos_pk = Some(v.into_owned()), + "ref_id" => ref_id = v.into_owned(), + "target" => target = Some(v.into_owned()), + "file" => file = Some(v.into_owned()), + _ => {}, + } + } + Ok(URL { + url: url.into(), + baseClass, + cmdClass, + cmdNode, + cmd, + forwardCmd, + thr_pk, + pos_pk, + ref_id, + target, + file, + }) + } +} diff --git a/src/main.rs b/src/main.rs index f451a7e..227b9b3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,42 +6,42 @@ use anyhow::{anyhow, Context, Result}; use colored::Colorize; use futures::future::{self, Either}; use futures_channel::mpsc::UnboundedSender; -use futures_util::{stream::TryStreamExt, StreamExt}; +use futures_util::stream::TryStreamExt; +use futures_util::StreamExt; use ignore::gitignore::Gitignore; -use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; +use indicatif::{ProgressDrawTarget, ProgressStyle}; use once_cell::sync::{Lazy, OnceCell}; -use reqwest::{Client, IntoUrl, Proxy}; -use scraper::{ElementRef, Html, Selector}; -use serde_json::json; +use scraper::Html; use structopt::StructOpt; use tokio::{fs, sync::Semaphore, time}; use tokio::task::{self, JoinHandle}; use tokio_util::io::StreamReader; use url::Url; -use std::error::Error as _; use std::future::Future; use std::io; use std::path::PathBuf; -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::atomic::Ordering; use std::sync::Arc; use std::collections::HashSet; +pub const ILIAS_URL: &str = "https://ilias.studium.kit.edu/"; + +#[macro_use] +mod cli; +use cli::*; +mod ilias; +use ilias::*; +use Object::*; mod util; use util::*; -const ILIAS_URL: &str = "https://ilias.studium.kit.edu/"; - -static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0); -static PROGRESS_BAR_ENABLED: AtomicBool = AtomicBool::new(false); -static PROGRESS_BAR: Lazy = Lazy::new(|| ProgressBar::new(0)); - /// Global job queue static TASKS: OnceCell>> = OnceCell::new(); static TASKS_RUNNING: Lazy = Lazy::new(|| Semaphore::new(0)); static REQUEST_TICKETS: Lazy = Lazy::new(|| Semaphore::new(0)); -async fn get_request_ticket() { +pub async fn get_request_ticket() { REQUEST_TICKETS.acquire().await.unwrap().forget(); } @@ -51,55 +51,6 @@ macro_rules! spawn { }; } -macro_rules! log { - ($lvl:expr, $($t:expr),+) => { - #[allow(unused_comparisons)] // 0 <= 0 - if $lvl <= LOG_LEVEL.load(Ordering::SeqCst) { - if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) { - PROGRESS_BAR.println(format!($($t),+)); - } else { - println!($($t),+); - } - } - } -} - -macro_rules! info { - ($t:tt) => { - println!($t); - }; -} - -macro_rules! success { - ($t:tt) => { - println!("{}", format!($t).bright_green()); - }; -} - -macro_rules! warning { - ($e:expr) => { - println!("Warning: {}", format!("{:?}", $e).bright_yellow()); - }; - ($msg:expr, $e:expr) => { - println!("Warning: {}", format!("{} {:?}", $msg, $e).bright_yellow()); - }; - ($msg1:expr, $msg2:expr, $e:expr) => { - println!("Warning: {}", format!("{} {} {:?}", $msg1, $msg2, $e).bright_yellow()); - }; - (format => $($e:expr),+) => { - println!("Warning: {}", format!($($e),+).bright_yellow()); - }; -} - -macro_rules! error { - ($($prefix:expr),+; $e:expr) => { - println!("{}: {}", format!($($prefix),+), format!("{:?}", $e).bright_red()); - }; - ($e:expr) => { - println!("Error: {}", format!("{:?}", $e).bright_red()); - }; -} - #[tokio::main] async fn main() { let opt = Opt::from_args(); @@ -199,60 +150,12 @@ async fn real_main(mut opt: Opt) -> Result<()> { } if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) { PROGRESS_BAR.inc(1); - PROGRESS_BAR.set_style(ProgressStyle::default_bar().template("[{pos}/{len}] {msg}")); + PROGRESS_BAR.set_style(ProgressStyle::default_bar().template("[{pos}/{len}] {wide_msg}")); PROGRESS_BAR.finish_with_message("done"); } Ok(()) } -fn ask_user_pass(opt: &Opt) -> Result<(String, String)> { - let user = if let Some(username) = opt.username.as_ref() { - username.clone() - } else { - rprompt::prompt_reply_stdout("Username: ").context("username prompt")? - }; - #[cfg(feature = "keyring-auth")] - let (pass, should_store); - #[cfg(feature = "keyring-auth")] - let keyring = Lazy::new(|| keyring::Keyring::new(env!("CARGO_PKG_NAME"), &user)); - #[cfg(not(feature = "keyring-auth"))] - let pass; - cfg_if::cfg_if! { // TODO: deduplicate the logic below - if #[cfg(feature = "keyring-auth")] { - if let Some(password) = opt.password.as_ref() { - pass = password.clone(); - should_store = true; - } else if opt.keyring { - match keyring.get_password() { - Ok(password) => { - pass = password; - should_store = false; - }, - Err(e) => { - error!(e); - pass = rpassword::read_password_from_tty(Some("Password: ")).context("password prompt")?; - should_store = true; - } - } - } else { - pass = rpassword::read_password_from_tty(Some("Password: ")).context("password prompt")?; - should_store = true; - } - } else { - if let Some(password) = opt.password.as_ref() { - pass = password.clone(); - } else { - pass = rpassword::read_password_from_tty(Some("Password: ")).context("password prompt")?; - } - } - }; - #[cfg(feature = "keyring-auth")] - if should_store && opt.keyring { - keyring.set_password(&pass).map_err(|x| anyhow!(x.to_string()))?; - } - Ok((user, pass)) -} - // https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229 #[allow(clippy::manual_async_fn)] fn process_gracefully( @@ -837,540 +740,3 @@ async fn process(ilias: Arc, path: PathBuf, obj: Object) -> Result<()> { } Ok(()) } - -#[derive(Debug, StructOpt)] -#[structopt(name = env!("CARGO_PKG_NAME"))] -struct Opt { - /// Do not download files - #[structopt(short, long)] - skip_files: bool, - - /// Do not download Opencast videos - #[structopt(short, long)] - no_videos: bool, - - /// Download forum content - #[structopt(short = "t", long)] - forum: bool, - - /// Re-download already present files - #[structopt(short)] - force: bool, - - /// Use content tree (experimental) - #[structopt(long)] - content_tree: bool, - - /// Re-check OpenCast lectures (slow) - #[structopt(long)] - check_videos: bool, - - /// Verbose logging - #[structopt(short, multiple = true, parse(from_occurrences))] - verbose: usize, - - /// Output directory - #[structopt(short, long, parse(from_os_str))] - output: PathBuf, - - /// Parallel download jobs - #[structopt(short, long, default_value = "1")] - jobs: usize, - - /// Proxy, e.g. socks5h://127.0.0.1:1080 - #[structopt(short, long)] - proxy: Option, - - /// Use the system keyring - #[structopt(long)] - #[cfg(feature = "keyring-auth")] - keyring: bool, - - /// KIT account username - #[structopt(short = "U", long)] - username: Option, - - /// KIT account password - #[structopt(short = "P", long)] - password: Option, - - /// ILIAS page to download - #[structopt(long)] - sync_url: Option, - - /// Requests per minute - #[structopt(long, default_value = "8")] - rate: usize -} - -struct ILIAS { - opt: Opt, - ignore: Gitignore, - // TODO: use these for re-authentication in case of session timeout/invalidation - user: String, - pass: String, - client: Client, -} - -/// Returns true if the error is caused by: -/// "http2 error: protocol error: not a result of an error" -fn error_is_http2(error: &reqwest::Error) -> bool { - error.source() - .map(|x| x.downcast_ref::()) - .flatten() - .map(|x| x.reason()) - .flatten() - .map(|x| x == h2::Reason::NO_ERROR) - .unwrap_or(false) -} - -impl ILIAS { - async fn login(opt: Opt, user: impl Into, pass: impl Into, ignore: Gitignore) -> Result { - let user = user.into(); - let pass = pass.into(); - let mut builder = Client::builder() - .cookie_store(true) - .user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"))); - if let Some(proxy) = opt.proxy.as_ref() { - let proxy = Proxy::all(proxy)?; - builder = builder.proxy(proxy); - } - let client = builder - // timeout is infinite by default - .build()?; - let this = ILIAS { opt, ignore, user, pass, client }; - info!("Logging into ILIAS using KIT account.."); - let session_establishment = this.client - .post("https://ilias.studium.kit.edu/Shibboleth.sso/Login") - .form(&json!({ - "sendLogin": "1", - "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth", - "target": "/shib_login.php?target=", - "home_organization_selection": "Mit KIT-Account anmelden" - })) - .send().await?; - let url = session_establishment.url().clone(); - let text = session_establishment.text().await?; - let dom_sso = Html::parse_document(text.as_str()); - let csrf_token = dom_sso - .select(&Selector::parse(r#"input[name="csrf_token"]"#).unwrap()) - .next().context("no csrf token")?; - info!("Logging into Shibboleth.."); - let login_response = this.client - .post(url) - .form(&json!({ - "j_username": &this.user, - "j_password": &this.pass, - "_eventId_proceed": "", - "csrf_token": csrf_token.value().attr("value").context("no csrf token")?, - })) - .send().await? - .text().await?; - let dom = Html::parse_document(&login_response); - let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap(); - let saml = dom - .select(&saml) - .next().context("no SAML response, incorrect password?")?; - let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap(); - let relay_state = dom.select(&relay_state).next().context("no relay state")?; - info!("Logging into ILIAS.."); - this.client - .post("https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST") - .form(&json!({ - "SAMLResponse": saml.value().attr("value").context("no SAML value")?, - "RelayState": relay_state.value().attr("value").context("no RelayState value")? - })) - .send().await?; - success!("Logged in!"); - Ok(this) - } - - async fn download(&self, url: &str) -> Result { - get_request_ticket().await; - log!(2, "Downloading {}", url); - let url = if url.starts_with("http://") || url.starts_with("https://") { - url.to_owned() - } else if url.starts_with("ilias.studium.kit.edu") { - format!("https://{}", url) - } else { - format!("{}{}", ILIAS_URL, url) - }; - for attempt in 1..10 { - let result = self.client.get(url.clone()).send().await; - match result { - Ok(x) => return Ok(x), - Err(e) if attempt <= 3 && error_is_http2(&e) => { - warning!("encountered HTTP/2 NO_ERROR, retrying download.."); - continue - }, - Err(e) => return Err(e.into()) - } - } - unreachable!() - } - - async fn head(&self, url: U) -> Result { - get_request_ticket().await; - let url = url.into_url()?; - for attempt in 1..10 { - let result = self.client.head(url.clone()).send().await; - match result { - Ok(x) => return Ok(x), - Err(e) if attempt <= 3 && error_is_http2(&e) => { - warning!("encountered HTTP/2 NO_ERROR, retrying HEAD request.."); - continue - }, - Err(e) => return Err(e) - } - } - unreachable!() - } - - async fn get_html(&self, url: &str) -> Result { - let text = self.download(url).await?.text().await?; - let html = Html::parse_document(&text); - if html.select(&alert_danger).next().is_some() { - Err(anyhow!("ILIAS error")) - } else { - Ok(html) - } - } - - async fn get_html_fragment(&self, url: &str) -> Result { - let text = self.download(url).await?.text().await?; - let html = Html::parse_fragment(&text); - if html.select(&alert_danger).next().is_some() { - Err(anyhow!("ILIAS error")) - } else { - Ok(html) - } - } - - fn get_items(html: &Html) -> Vec> { - html.select(&container_items) - .flat_map(|item| { - item.select(&container_item_title) - .next() - .map(|link| Object::from_link(item, link)) - // items without links are ignored - }) - .collect() - } - - /// Returns subfolders and the main text on the course page. - async fn get_course_content(&self, url: &URL) -> Result<(Vec>, Option)> { - let html = self.get_html(&url.url).await?; - let main_text = if let Some(el) = html.select(&il_content_container).next() { - if !el.children().flat_map(|x| x.value().as_element()).next().map(|x| - x.attr("class").unwrap_or_default().contains("ilContainerBlock")).unwrap_or(false) - && el.inner_html().len() > 40 { - // ^ minimum length of useful content? - Some(el.inner_html()) - } else { - // first element is the content overview => no custom text (?) - None - } - } else { - None - }; - Ok((ILIAS::get_items(&html), main_text)) - } - - async fn personal_desktop(&self) -> Result { - let html = self.get_html("https://ilias.studium.kit.edu/ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems").await?; - let items = ILIAS::get_items(&html) - .into_iter() - .flat_map(Result::ok) - .collect(); - Ok(Dashboard { items }) - } - - async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result> { - // TODO: this magically does not return sub-folders - // opening the same url in browser does show sub-folders?! - let url = format!( - "{}ilias.php?ref_id={}&cmdClass=ilobjcoursegui&cmd=showRepTree&cmdNode={}&baseClass=ilRepositoryGUI&cmdMode=asynch&exp_cmd=getNodeAsync&node_id=exp_node_rep_exp_{}&exp_cont=il_expl2_jstree_cont_rep_exp&searchterm=", - ILIAS_URL, ref_id, cmd_node, ref_id - ); - let html = self.get_html_fragment(&url).await?; - let mut items = Vec::new(); - for link in html.select(&a) { - if link.value().attr("href").is_some() { - items.push(Object::from_link(link, link)?); - } // else: disabled course - } - Ok(items) - } -} - -#[derive(Debug)] -struct Dashboard { - items: Vec, -} - -#[derive(Debug)] -enum Object { - Course { name: String, url: URL }, - Folder { name: String, url: URL }, - File { name: String, url: URL }, - Forum { name: String, url: URL }, - Thread { url: URL }, - Wiki { name: String, url: URL }, - ExerciseHandler { name: String, url: URL }, - Weblink { name: String, url: URL }, - Survey { name: String, url: URL }, - Presentation { name: String, url: URL }, - PluginDispatch { name: String, url: URL }, - Video { url: URL }, - Generic { name: String, url: URL }, -} - -use Object::*; - -impl Object { - fn name(&self) -> &str { - match self { - Course { name, .. } - | Folder { name, .. } - | File { name, .. } - | Forum { name, .. } - | Wiki { name, .. } - | Weblink { name, .. } - | Survey { name, .. } - | Presentation { name, .. } - | ExerciseHandler { name, .. } - | PluginDispatch { name, .. } - | Generic { name, .. } => &name, - Thread { url } => &url.thr_pk.as_ref().unwrap(), - Video { url } => &url.url, - } - } - - fn url(&self) -> &URL { - match self { - Course { url, .. } - | Folder { url, .. } - | File { url, .. } - | Forum { url, .. } - | Thread { url } - | Wiki { url, .. } - | Weblink { url, .. } - | Survey { url, .. } - | Presentation { url, .. } - | ExerciseHandler { url, .. } - | PluginDispatch { url, .. } - | Video { url } - | Generic { url, .. } => &url, - } - } - - fn kind(&self) -> &str { - match self { - Course { .. } => "course", - Folder { .. } => "folder", - File { .. } => "file", - Forum { .. } => "forum", - Thread { .. } => "thread", - Wiki { .. } => "wiki", - Weblink { .. } => "weblink", - Survey { .. } => "survey", - Presentation { .. } => "presentation", - ExerciseHandler { .. } => "exercise handler", - PluginDispatch { .. } => "plugin dispatch", - Video { .. } => "video", - Generic { .. } => "generic", - } - } - - fn is_dir(&self) -> bool { - match self { - Course { .. } - | Folder { .. } - | Forum { .. } - | Thread { .. } - | Wiki { .. } - | ExerciseHandler { .. } - | PluginDispatch { .. } => true, - _ => false, - } - } - - fn from_link(item: ElementRef, link: ElementRef) -> Result { - let name = link - .text() - .collect::() - .replace('/', "-") - .trim() - .to_owned(); - let url = URL::from_href(link.value().attr("href").context("link missing href")?)?; - Object::from_url(url, name, Some(item)) - } - - fn from_url(mut url: URL, mut name: String, item: Option) -> Result { - if url.thr_pk.is_some() { - return Ok(Thread { url }); - } - - if url - .url - .starts_with("https://ilias.studium.kit.edu/goto.php") - { - let target = url.target.as_deref().unwrap_or("NONE"); - if target.starts_with("wiki_") { - return Ok(Wiki { - name, - url, // TODO: insert ref_id here - }); - } - if target.starts_with("root_") { - // magazine link - return Ok(Generic { name, url }); - } - if target.starts_with("crs_") { - let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap(); - url.ref_id = ref_id.to_owned(); - return Ok(Course { name, url }); - } - if target.starts_with("frm_") { - // TODO: extract post link? (this codepath should only be hit when parsing the content tree) - let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap(); - url.ref_id = ref_id.to_owned(); - return Ok(Forum { name, url }); - } - if target.starts_with("lm_") { - // fancy interactive task - return Ok(Presentation { name, url }); - } - if target.starts_with("fold_") { - let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap(); - url.ref_id = ref_id.to_owned(); - return Ok(Folder { name, url }); - } - if target.starts_with("file_") { - if !target.ends_with("download") { - // download page containing metadata - return Ok(Generic { name, url }); - } else { - let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop); - let ext = item_props.next().context("cannot find file extension")?; - let version = item_props - .nth(1) - .context("cannot find 3rd file metadata")? - .text() - .collect::(); - let version = version.trim(); - if let Some(v) = version.strip_prefix("Version: ") { - name += "_v"; - name += v; - } - return Ok(File { - name: format!("{}.{}", name, ext.text().collect::().trim()), - url, - }); - } - } - return Ok(Generic { name, url }); - } - - if url.cmd.as_deref() == Some("showThreads") { - return Ok(Forum { name, url }); - } - - // class name is *sometimes* in CamelCase - Ok(match &*url.baseClass.to_ascii_lowercase() { - "ilexercisehandlergui" => ExerciseHandler { name, url }, - "ililwikihandlergui" => Wiki { name, url }, - "illinkresourcehandlergui" => Weblink { name, url }, - "ilobjsurveygui" => Survey { name, url }, - "illmpresentationgui" => Presentation { name, url }, - "ilrepositorygui" => match url.cmd.as_deref() { - Some("view") | Some("render") => Folder { name, url }, - Some(_) => Generic { name, url }, - None => Course { name, url }, - }, - "ilobjplugindispatchgui" => PluginDispatch { name, url }, - _ => Generic { name, url }, - }) - } -} - -#[allow(non_snake_case)] -#[derive(Debug)] -struct URL { - url: String, - baseClass: String, - cmdClass: Option, - cmdNode: Option, - cmd: Option, - forwardCmd: Option, - thr_pk: Option, - pos_pk: Option, - ref_id: String, - target: Option, - file: Option, -} - -#[allow(non_snake_case)] -impl URL { - fn raw(url: String) -> Self { - URL { - url, - baseClass: String::new(), - cmdClass: None, - cmdNode: None, - cmd: None, - forwardCmd: None, - thr_pk: None, - pos_pk: None, - ref_id: String::new(), - target: None, - file: None, - } - } - - fn from_href(href: &str) -> Result { - let url = if !href.starts_with(ILIAS_URL) { - Url::parse(&format!("{}{}", ILIAS_URL, href))? - } else { - Url::parse(href)? - }; - let mut baseClass = String::new(); - let mut cmdClass = None; - let mut cmdNode = None; - let mut cmd = None; - let mut forwardCmd = None; - let mut thr_pk = None; - let mut pos_pk = None; - let mut ref_id = String::new(); - let mut target = None; - let mut file = None; - for (k, v) in url.query_pairs() { - match &*k { - "baseClass" => baseClass = v.into_owned(), - "cmdClass" => cmdClass = Some(v.into_owned()), - "cmdNode" => cmdNode = Some(v.into_owned()), - "cmd" => cmd = Some(v.into_owned()), - "forwardCmd" => forwardCmd = Some(v.into_owned()), - "thr_pk" => thr_pk = Some(v.into_owned()), - "pos_pk" => pos_pk = Some(v.into_owned()), - "ref_id" => ref_id = v.into_owned(), - "target" => target = Some(v.into_owned()), - "file" => file = Some(v.into_owned()), - _ => {}, - } - } - Ok(URL { - url: url.into(), - baseClass, - cmdClass, - cmdNode, - cmd, - forwardCmd, - thr_pk, - pos_pk, - ref_id, - target, - file, - }) - } -}