mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Expand individual sessions
This commit is contained in:
parent
cc7dcd6f9d
commit
ff982a5723
12
Cargo.lock
generated
12
Cargo.lock
generated
@ -7,6 +7,7 @@ name = "KIT-ILIAS-downloader"
|
|||||||
version = "0.3.6"
|
version = "0.3.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"async-recursion",
|
||||||
"atty",
|
"atty",
|
||||||
"bytes",
|
"bytes",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
@ -111,6 +112,17 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-recursion"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atty"
|
name = "atty"
|
||||||
version = "0.2.14"
|
version = "0.2.14"
|
||||||
|
@ -36,3 +36,4 @@ bytes = "1.0.1"
|
|||||||
toml = "0.5.8"
|
toml = "0.5.8"
|
||||||
tempfile = "3.2.0"
|
tempfile = "3.2.0"
|
||||||
ego-tree = "0.6.2"
|
ego-tree = "0.6.2"
|
||||||
|
async-recursion = "1.0.0"
|
||||||
|
@ -5,6 +5,7 @@ use std::{collections::HashMap, error::Error as _, io::Write, sync::Arc};
|
|||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use cookie_store::CookieStore;
|
use cookie_store::CookieStore;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
use reqwest::{Client, IntoUrl, Proxy, Url};
|
use reqwest::{Client, IntoUrl, Proxy, Url};
|
||||||
use reqwest_cookie_store::CookieStoreMutex;
|
use reqwest_cookie_store::CookieStoreMutex;
|
||||||
use scraper::{ElementRef, Html, Selector};
|
use scraper::{ElementRef, Html, Selector};
|
||||||
@ -267,8 +268,8 @@ impl ILIAS {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns subfolders and the main text in a course/folder/personal desktop.
|
/// Returns subfolders, the main text in a course/folder/personal desktop and all links on the page.
|
||||||
pub async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
pub async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>, Vec<String>)> {
|
||||||
let html = self.get_html(&url.url).await?;
|
let html = self.get_html(&url.url).await?;
|
||||||
|
|
||||||
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
|
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
|
||||||
@ -281,7 +282,7 @@ impl ILIAS {
|
|||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
Ok((ILIAS::get_items(&html), main_text))
|
Ok((ILIAS::get_items(&html), main_text, html.select(&LINKS).flat_map(|x| x.value().attr("href").map(|x| x.to_owned())).collect()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result<Vec<Object>> {
|
pub async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result<Vec<Object>> {
|
||||||
|
@ -28,11 +28,13 @@ pub async fn download(path: PathBuf, ilias: Arc<ILIAS>, url: &URL, name: &str) -
|
|||||||
return Ok(()); // ignore groups we are not in
|
return Ok(()); // ignore groups we are not in
|
||||||
}
|
}
|
||||||
warning!(name, "falling back to incomplete course content extractor!", e);
|
warning!(name, "falling back to incomplete course content extractor!", e);
|
||||||
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
let (items, main_text, _) = ilias.get_course_content(&url).await?;
|
||||||
|
(items, main_text)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ilias.get_course_content(&url).await?
|
let (items, main_text, _) = ilias.get_course_content(&url).await?;
|
||||||
|
(items, main_text)
|
||||||
};
|
};
|
||||||
if ilias.opt.save_ilias_pages {
|
if ilias.opt.save_ilias_pages {
|
||||||
if let Some(s) = content.1.as_ref() {
|
if let Some(s) = content.1.as_ref() {
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
use std::{collections::HashSet, path::Path, sync::Arc};
|
use std::{collections::HashSet, path::Path, sync::Arc};
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
|
use async_recursion::async_recursion;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
process_gracefully,
|
process_gracefully,
|
||||||
@ -10,8 +13,20 @@ use crate::{
|
|||||||
|
|
||||||
use super::{ILIAS, URL};
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
static EXPAND_LINK: Lazy<Regex> = Lazy::new(|| Regex::new("expand=\\d").unwrap());
|
||||||
|
|
||||||
|
#[async_recursion]
|
||||||
pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
let content = ilias.get_course_content(&url).await?;
|
let content = ilias.get_course_content(&url).await?;
|
||||||
|
|
||||||
|
// expand all sessions
|
||||||
|
for href in content.2 {
|
||||||
|
// link format: ilias.php?ref_id=1943526&expand=2602906&cmd=view&cmdClass=ilobjfoldergui&cmdNode=x1:nk&baseClass=ilrepositorygui#lg_div_1948579_pref_1943526
|
||||||
|
if EXPAND_LINK.is_match(&href) {
|
||||||
|
return download(path, ilias, &URL::from_href(&href)?).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ilias.opt.save_ilias_pages {
|
if ilias.opt.save_ilias_pages {
|
||||||
if let Some(s) = content.1.as_ref() {
|
if let Some(s) = content.1.as_ref() {
|
||||||
let path = path.join("folder.html");
|
let path = path.join("folder.html");
|
||||||
@ -20,6 +35,7 @@ pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
|||||||
.context("failed to write folder page html")?;
|
.context("failed to write folder page html")?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut names = HashSet::new();
|
let mut names = HashSet::new();
|
||||||
for item in content.0 {
|
for item in content.0 {
|
||||||
let item = item?;
|
let item = item?;
|
||||||
|
@ -14,7 +14,7 @@ impl IliasIgnore {
|
|||||||
let mut prefix = Vec::new();
|
let mut prefix = Vec::new();
|
||||||
// example scenario:
|
// example scenario:
|
||||||
// path = /KIT/ILIAS/SS 23/Next Generation Internet
|
// path = /KIT/ILIAS/SS 23/Next Generation Internet
|
||||||
// iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet
|
// iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet/
|
||||||
// iliasignore in Next Generation Internet/.iliasignore: prefix = ""
|
// iliasignore in Next Generation Internet/.iliasignore: prefix = ""
|
||||||
loop {
|
loop {
|
||||||
let (ignore, error) = Gitignore::new(path.join(".iliasignore"));
|
let (ignore, error) = Gitignore::new(path.join(".iliasignore"));
|
||||||
|
Loading…
Reference in New Issue
Block a user