diff --git a/Cargo.lock b/Cargo.lock index 14ea6f0..c64c649 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,6 +7,7 @@ name = "KIT-ILIAS-downloader" version = "0.3.6" dependencies = [ "anyhow", + "async-recursion", "atty", "bytes", "cfg-if", @@ -111,6 +112,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "async-recursion" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atty" version = "0.2.14" diff --git a/Cargo.toml b/Cargo.toml index 03e549c..b37e7db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,3 +36,4 @@ bytes = "1.0.1" toml = "0.5.8" tempfile = "3.2.0" ego-tree = "0.6.2" +async-recursion = "1.0.0" diff --git a/src/ilias.rs b/src/ilias.rs index 48761dd..78b9e3b 100644 --- a/src/ilias.rs +++ b/src/ilias.rs @@ -5,6 +5,7 @@ use std::{collections::HashMap, error::Error as _, io::Write, sync::Arc}; use anyhow::{anyhow, Context, Result}; use cookie_store::CookieStore; use once_cell::sync::Lazy; +use regex::Regex; use reqwest::{Client, IntoUrl, Proxy, Url}; use reqwest_cookie_store::CookieStoreMutex; use scraper::{ElementRef, Html, Selector}; @@ -267,8 +268,8 @@ impl ILIAS { .collect() } - /// Returns subfolders and the main text in a course/folder/personal desktop. - pub async fn get_course_content(&self, url: &URL) -> Result<(Vec>, Option)> { + /// Returns subfolders, the main text in a course/folder/personal desktop and all links on the page. + pub async fn get_course_content(&self, url: &URL) -> Result<(Vec>, Option, Vec)> { let html = self.get_html(&url.url).await?; let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() { @@ -281,7 +282,7 @@ impl ILIAS { } else { None }; - Ok((ILIAS::get_items(&html), main_text)) + Ok((ILIAS::get_items(&html), main_text, html.select(&LINKS).flat_map(|x| x.value().attr("href").map(|x| x.to_owned())).collect())) } pub async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result> { diff --git a/src/ilias/course.rs b/src/ilias/course.rs index 03d2298..76c0f9f 100644 --- a/src/ilias/course.rs +++ b/src/ilias/course.rs @@ -28,11 +28,13 @@ pub async fn download(path: PathBuf, ilias: Arc, url: &URL, name: &str) - return Ok(()); // ignore groups we are not in } warning!(name, "falling back to incomplete course content extractor!", e); - ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x + let (items, main_text, _) = ilias.get_course_content(&url).await?; + (items, main_text) }, } } else { - ilias.get_course_content(&url).await? + let (items, main_text, _) = ilias.get_course_content(&url).await?; + (items, main_text) }; if ilias.opt.save_ilias_pages { if let Some(s) = content.1.as_ref() { diff --git a/src/ilias/folder.rs b/src/ilias/folder.rs index f977fae..898ecc5 100644 --- a/src/ilias/folder.rs +++ b/src/ilias/folder.rs @@ -1,6 +1,9 @@ use std::{collections::HashSet, path::Path, sync::Arc}; use anyhow::{Context, Result}; +use async_recursion::async_recursion; +use once_cell::sync::Lazy; +use regex::Regex; use crate::{ process_gracefully, @@ -10,8 +13,20 @@ use crate::{ use super::{ILIAS, URL}; +static EXPAND_LINK: Lazy = Lazy::new(|| Regex::new("expand=\\d").unwrap()); + +#[async_recursion] pub async fn download(path: &Path, ilias: Arc, url: &URL) -> Result<()> { let content = ilias.get_course_content(&url).await?; + + // expand all sessions + for href in content.2 { + // link format: ilias.php?ref_id=1943526&expand=2602906&cmd=view&cmdClass=ilobjfoldergui&cmdNode=x1:nk&baseClass=ilrepositorygui#lg_div_1948579_pref_1943526 + if EXPAND_LINK.is_match(&href) { + return download(path, ilias, &URL::from_href(&href)?).await; + } + } + if ilias.opt.save_ilias_pages { if let Some(s) = content.1.as_ref() { let path = path.join("folder.html"); @@ -20,6 +35,7 @@ pub async fn download(path: &Path, ilias: Arc, url: &URL) -> Result<()> { .context("failed to write folder page html")?; } } + let mut names = HashSet::new(); for item in content.0 { let item = item?; diff --git a/src/iliasignore.rs b/src/iliasignore.rs index 65efbf4..eaf5129 100644 --- a/src/iliasignore.rs +++ b/src/iliasignore.rs @@ -14,7 +14,7 @@ impl IliasIgnore { let mut prefix = Vec::new(); // example scenario: // path = /KIT/ILIAS/SS 23/Next Generation Internet - // iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet + // iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet/ // iliasignore in Next Generation Internet/.iliasignore: prefix = "" loop { let (ignore, error) = Gitignore::new(path.join(".iliasignore"));