mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Expand individual sessions
This commit is contained in:
parent
cc7dcd6f9d
commit
ff982a5723
12
Cargo.lock
generated
12
Cargo.lock
generated
@ -7,6 +7,7 @@ name = "KIT-ILIAS-downloader"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-recursion",
|
||||
"atty",
|
||||
"bytes",
|
||||
"cfg-if",
|
||||
@ -111,6 +112,17 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-recursion"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
|
@ -36,3 +36,4 @@ bytes = "1.0.1"
|
||||
toml = "0.5.8"
|
||||
tempfile = "3.2.0"
|
||||
ego-tree = "0.6.2"
|
||||
async-recursion = "1.0.0"
|
||||
|
@ -5,6 +5,7 @@ use std::{collections::HashMap, error::Error as _, io::Write, sync::Arc};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use cookie_store::CookieStore;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use reqwest::{Client, IntoUrl, Proxy, Url};
|
||||
use reqwest_cookie_store::CookieStoreMutex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
@ -267,8 +268,8 @@ impl ILIAS {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns subfolders and the main text in a course/folder/personal desktop.
|
||||
pub async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
||||
/// Returns subfolders, the main text in a course/folder/personal desktop and all links on the page.
|
||||
pub async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>, Vec<String>)> {
|
||||
let html = self.get_html(&url.url).await?;
|
||||
|
||||
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
|
||||
@ -281,7 +282,7 @@ impl ILIAS {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Ok((ILIAS::get_items(&html), main_text))
|
||||
Ok((ILIAS::get_items(&html), main_text, html.select(&LINKS).flat_map(|x| x.value().attr("href").map(|x| x.to_owned())).collect()))
|
||||
}
|
||||
|
||||
pub async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result<Vec<Object>> {
|
||||
|
@ -28,11 +28,13 @@ pub async fn download(path: PathBuf, ilias: Arc<ILIAS>, url: &URL, name: &str) -
|
||||
return Ok(()); // ignore groups we are not in
|
||||
}
|
||||
warning!(name, "falling back to incomplete course content extractor!", e);
|
||||
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
||||
let (items, main_text, _) = ilias.get_course_content(&url).await?;
|
||||
(items, main_text)
|
||||
},
|
||||
}
|
||||
} else {
|
||||
ilias.get_course_content(&url).await?
|
||||
let (items, main_text, _) = ilias.get_course_content(&url).await?;
|
||||
(items, main_text)
|
||||
};
|
||||
if ilias.opt.save_ilias_pages {
|
||||
if let Some(s) = content.1.as_ref() {
|
||||
|
@ -1,6 +1,9 @@
|
||||
use std::{collections::HashSet, path::Path, sync::Arc};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use async_recursion::async_recursion;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{
|
||||
process_gracefully,
|
||||
@ -10,8 +13,20 @@ use crate::{
|
||||
|
||||
use super::{ILIAS, URL};
|
||||
|
||||
static EXPAND_LINK: Lazy<Regex> = Lazy::new(|| Regex::new("expand=\\d").unwrap());
|
||||
|
||||
#[async_recursion]
|
||||
pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||
let content = ilias.get_course_content(&url).await?;
|
||||
|
||||
// expand all sessions
|
||||
for href in content.2 {
|
||||
// link format: ilias.php?ref_id=1943526&expand=2602906&cmd=view&cmdClass=ilobjfoldergui&cmdNode=x1:nk&baseClass=ilrepositorygui#lg_div_1948579_pref_1943526
|
||||
if EXPAND_LINK.is_match(&href) {
|
||||
return download(path, ilias, &URL::from_href(&href)?).await;
|
||||
}
|
||||
}
|
||||
|
||||
if ilias.opt.save_ilias_pages {
|
||||
if let Some(s) = content.1.as_ref() {
|
||||
let path = path.join("folder.html");
|
||||
@ -20,6 +35,7 @@ pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||
.context("failed to write folder page html")?;
|
||||
}
|
||||
}
|
||||
|
||||
let mut names = HashSet::new();
|
||||
for item in content.0 {
|
||||
let item = item?;
|
||||
|
@ -14,7 +14,7 @@ impl IliasIgnore {
|
||||
let mut prefix = Vec::new();
|
||||
// example scenario:
|
||||
// path = /KIT/ILIAS/SS 23/Next Generation Internet
|
||||
// iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet
|
||||
// iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet/
|
||||
// iliasignore in Next Generation Internet/.iliasignore: prefix = ""
|
||||
loop {
|
||||
let (ignore, error) = Gitignore::new(path.join(".iliasignore"));
|
||||
|
Loading…
Reference in New Issue
Block a user