mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Make content tree downloading optional
It is *really* slow.
This commit is contained in:
parent
97ee02756d
commit
b1c148f5a9
77
src/main.rs
77
src/main.rs
@ -161,7 +161,7 @@ impl Object {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
if url.target.as_ref().map(|x| x.starts_with("frm_")).unwrap_or(false) {
|
if url.target.as_ref().map(|x| x.starts_with("frm_")).unwrap_or(false) {
|
||||||
// TODO: extract post link? (this codepath should only be hit when parsing the content tree)
|
// TODO: extract post link? (however, this codepath should only be hit when parsing the content tree)
|
||||||
let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap();
|
let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap();
|
||||||
url.ref_id = ref_id.to_owned();
|
url.ref_id = ref_id.to_owned();
|
||||||
return Forum {
|
return Forum {
|
||||||
@ -188,7 +188,7 @@ impl Object {
|
|||||||
let target = url.target.as_ref().unwrap();
|
let target = url.target.as_ref().unwrap();
|
||||||
if !target.ends_with("download") {
|
if !target.ends_with("download") {
|
||||||
// download page containing metadata
|
// download page containing metadata
|
||||||
// TODO: perhaps process that? not really needed
|
// TODO: perhaps process that? not really needed since it'll be in a folder anyway
|
||||||
return Generic {
|
return Generic {
|
||||||
name,
|
name,
|
||||||
url
|
url
|
||||||
@ -213,6 +213,7 @@ impl Object {
|
|||||||
return Forum { name, url };
|
return Forum { name, url };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// class name is *sometimes* in CamelCase
|
||||||
match &*url.baseClass.to_ascii_lowercase() {
|
match &*url.baseClass.to_ascii_lowercase() {
|
||||||
"ilexercisehandlergui" => ExerciseHandler { name, url },
|
"ilexercisehandlergui" => ExerciseHandler { name, url },
|
||||||
"ililwikihandlergui" => Wiki { name, url },
|
"ililwikihandlergui" => Wiki { name, url },
|
||||||
@ -378,21 +379,19 @@ impl ILIAS {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn get_html(&self, url: &str) -> Result<Html> {
|
async fn get_html(&self, url: &str) -> Result<Html> {
|
||||||
let text = if url.starts_with("http") || url.starts_with("ilias.studium.kit.edu") {
|
let text = self.download(url).await?.text().await?;
|
||||||
self.client.get(url).send().await?.text().await?
|
let html = Html::parse_document(&text);
|
||||||
|
if html.select(&alert_danger).next().is_some() {
|
||||||
|
Err("ILIAS error".into())
|
||||||
} else {
|
} else {
|
||||||
let url = format!("{}{}", ILIAS_URL, url);
|
Ok(html)
|
||||||
self.client.get(&url).send().await?.text().await?
|
}
|
||||||
};
|
|
||||||
Ok(Html::parse_document(&text))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_html_fragment(&self, url: &str) -> Result<Html> {
|
async fn get_html_fragment(&self, url: &str) -> Result<Html> {
|
||||||
let text = self.client.get(url).send().await?.text().await?;
|
let text = self.download(url).await?.text().await?;
|
||||||
let html = Html::parse_fragment(&text);
|
let html = Html::parse_fragment(&text);
|
||||||
// TODO: have this in get_html too
|
|
||||||
if html.select(&alert_danger).next().is_some() {
|
if html.select(&alert_danger).next().is_some() {
|
||||||
//println!("{}", text);
|
|
||||||
Err("ILIAS error".into())
|
Err("ILIAS error".into())
|
||||||
} else {
|
} else {
|
||||||
Ok(html)
|
Ok(html)
|
||||||
@ -406,7 +405,7 @@ impl ILIAS {
|
|||||||
|
|
||||||
async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result<Vec<Object>> {
|
async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result<Vec<Object>> {
|
||||||
// TODO: this magically does not return sub-folders
|
// TODO: this magically does not return sub-folders
|
||||||
// opening the same url in browser does show sub-folders..
|
// opening the same url in browser does show sub-folders?!
|
||||||
let url = format!(
|
let url = format!(
|
||||||
"{}ilias.php?ref_id={}&cmdClass=ilobjcoursegui&cmd=showRepTree&cmdNode={}&baseClass=ilRepositoryGUI&cmdMode=asynch&exp_cmd=getNodeAsync&node_id=exp_node_rep_exp_{}&exp_cont=il_expl2_jstree_cont_rep_exp&searchterm=",
|
"{}ilias.php?ref_id={}&cmdClass=ilobjcoursegui&cmd=showRepTree&cmdNode={}&baseClass=ilRepositoryGUI&cmdMode=asynch&exp_cmd=getNodeAsync&node_id=exp_node_rep_exp_{}&exp_cont=il_expl2_jstree_cont_rep_exp&searchterm=",
|
||||||
ILIAS_URL, ref_id, cmd_node, ref_id
|
ILIAS_URL, ref_id, cmd_node, ref_id
|
||||||
@ -442,12 +441,14 @@ impl ILIAS {
|
|||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
let opt = Opt::from_args();
|
let opt = Opt::from_args();
|
||||||
|
// need this because error handling is WIP
|
||||||
*PANIC_HOOK.lock() = panic::take_hook();
|
*PANIC_HOOK.lock() = panic::take_hook();
|
||||||
panic::set_hook(Box::new(|info| {
|
panic::set_hook(Box::new(|info| {
|
||||||
*TASKS_RUNNING.lock() -= 1;
|
*TASKS_RUNNING.lock() -= 1;
|
||||||
*TASKS_QUEUED.lock() -= 1;
|
*TASKS_QUEUED.lock() -= 1;
|
||||||
PANIC_HOOK.lock()(info);
|
PANIC_HOOK.lock()(info);
|
||||||
}));
|
}));
|
||||||
|
|
||||||
let user = rprompt::prompt_reply_stdout("Username: ").unwrap();
|
let user = rprompt::prompt_reply_stdout("Username: ").unwrap();
|
||||||
let pass = rpassword::read_password_from_tty(Some("Password: ")).unwrap();
|
let pass = rpassword::read_password_from_tty(Some("Password: ")).unwrap();
|
||||||
let ilias = match ILIAS::login::<_, String>(opt, user, pass).await {
|
let ilias = match ILIAS::login::<_, String>(opt, user, pass).await {
|
||||||
@ -457,8 +458,11 @@ async fn main() {
|
|||||||
std::process::exit(77);
|
std::process::exit(77);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// need this to get the content tree
|
if ilias.opt.content_tree {
|
||||||
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1").send().await;
|
// need this to get the content tree
|
||||||
|
// TODO error handling
|
||||||
|
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1").send().await;
|
||||||
|
}
|
||||||
let ilias = Arc::new(ilias);
|
let ilias = Arc::new(ilias);
|
||||||
let desktop = ilias.personal_desktop().await.unwrap();
|
let desktop = ilias.personal_desktop().await.unwrap();
|
||||||
for item in desktop.items {
|
for item in desktop.items {
|
||||||
@ -474,8 +478,11 @@ async fn main() {
|
|||||||
while *TASKS_QUEUED.lock() > 0 {
|
while *TASKS_QUEUED.lock() > 0 {
|
||||||
tokio::time::delay_for(Duration::from_millis(500)).await;
|
tokio::time::delay_for(Duration::from_millis(500)).await;
|
||||||
}
|
}
|
||||||
// restore fast page loading times
|
if ilias.opt.content_tree {
|
||||||
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1").send().await;
|
// restore fast page loading times
|
||||||
|
// TODO error handling
|
||||||
|
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1").send().await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static!{
|
lazy_static!{
|
||||||
@ -539,20 +546,24 @@ fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::F
|
|||||||
Err(e)?;
|
Err(e)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let html = ilias.download(&url.url).await?.text().await?;
|
let content = if ilias.opt.content_tree {
|
||||||
let cmd_node = cmd_node_regex.find(&html).ok_or::<Error>("can't find cmdNode".into())?.as_str()[8..].to_owned();
|
let html = ilias.download(&url.url).await?.text().await?;
|
||||||
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
|
let cmd_node = cmd_node_regex.find(&html).ok_or::<Error>("can't find cmdNode".into())?.as_str()[8..].to_owned();
|
||||||
let content = match content_tree {
|
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
|
||||||
Ok(tree) => tree,
|
match content_tree {
|
||||||
Err(e) => {
|
Ok(tree) => tree,
|
||||||
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
|
Err(e) => {
|
||||||
// TODO: this is probably never the case for folders?
|
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
|
||||||
if html.contains(r#"input[name="cmd[join]""#) {
|
// TODO: this is probably never the case for folders?
|
||||||
return Ok(()); // ignore groups we are not in
|
if html.contains(r#"input[name="cmd[join]""#) {
|
||||||
|
return Ok(()); // ignore groups we are not in
|
||||||
|
}
|
||||||
|
println!("Warning: {:?} falling back to incomplete course content extractor! {}", name, e.display_chain());
|
||||||
|
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
||||||
}
|
}
|
||||||
println!("Warning: {:?} falling back to incomplete course content extractor! {}", name, e.display_chain());
|
|
||||||
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
ilias.get_course_content(&url).await?
|
||||||
};
|
};
|
||||||
for item in content {
|
for item in content {
|
||||||
let mut path = path.clone();
|
let mut path = path.clone();
|
||||||
@ -825,7 +836,7 @@ struct Opt {
|
|||||||
/// Do not download files
|
/// Do not download files
|
||||||
#[structopt(short, long)]
|
#[structopt(short, long)]
|
||||||
skip_files: bool,
|
skip_files: bool,
|
||||||
|
|
||||||
/// Do not download Opencast videos
|
/// Do not download Opencast videos
|
||||||
#[structopt(short, long)]
|
#[structopt(short, long)]
|
||||||
no_videos: bool,
|
no_videos: bool,
|
||||||
@ -833,11 +844,15 @@ struct Opt {
|
|||||||
/// Download forum content
|
/// Download forum content
|
||||||
#[structopt(short = "t", long)]
|
#[structopt(short = "t", long)]
|
||||||
forum: bool,
|
forum: bool,
|
||||||
|
|
||||||
/// Re-download already present files
|
/// Re-download already present files
|
||||||
#[structopt(short)]
|
#[structopt(short)]
|
||||||
force: bool,
|
force: bool,
|
||||||
|
|
||||||
|
/// Use content tree (slow but thorough)
|
||||||
|
#[structopt(long)]
|
||||||
|
content_tree: bool,
|
||||||
|
|
||||||
/// Verbose logging (print objects downloaded)
|
/// Verbose logging (print objects downloaded)
|
||||||
#[structopt(short, multiple = true, parse(from_occurrences))]
|
#[structopt(short, multiple = true, parse(from_occurrences))]
|
||||||
verbose: usize,
|
verbose: usize,
|
||||||
|
Loading…
Reference in New Issue
Block a user