Make content tree downloading optional

It is *really* slow.
This commit is contained in:
FliegendeWurst 2020-04-24 09:46:48 +02:00
parent 97ee02756d
commit b1c148f5a9

View File

@ -161,7 +161,7 @@ impl Object {
}; };
} }
if url.target.as_ref().map(|x| x.starts_with("frm_")).unwrap_or(false) { if url.target.as_ref().map(|x| x.starts_with("frm_")).unwrap_or(false) {
// TODO: extract post link? (this codepath should only be hit when parsing the content tree) // TODO: extract post link? (however, this codepath should only be hit when parsing the content tree)
let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap(); let ref_id = url.target.as_ref().unwrap().split('_').nth(1).unwrap();
url.ref_id = ref_id.to_owned(); url.ref_id = ref_id.to_owned();
return Forum { return Forum {
@ -188,7 +188,7 @@ impl Object {
let target = url.target.as_ref().unwrap(); let target = url.target.as_ref().unwrap();
if !target.ends_with("download") { if !target.ends_with("download") {
// download page containing metadata // download page containing metadata
// TODO: perhaps process that? not really needed // TODO: perhaps process that? not really needed since it'll be in a folder anyway
return Generic { return Generic {
name, name,
url url
@ -213,6 +213,7 @@ impl Object {
return Forum { name, url }; return Forum { name, url };
} }
// class name is *sometimes* in CamelCase
match &*url.baseClass.to_ascii_lowercase() { match &*url.baseClass.to_ascii_lowercase() {
"ilexercisehandlergui" => ExerciseHandler { name, url }, "ilexercisehandlergui" => ExerciseHandler { name, url },
"ililwikihandlergui" => Wiki { name, url }, "ililwikihandlergui" => Wiki { name, url },
@ -378,21 +379,19 @@ impl ILIAS {
} }
async fn get_html(&self, url: &str) -> Result<Html> { async fn get_html(&self, url: &str) -> Result<Html> {
let text = if url.starts_with("http") || url.starts_with("ilias.studium.kit.edu") { let text = self.download(url).await?.text().await?;
self.client.get(url).send().await?.text().await? let html = Html::parse_document(&text);
if html.select(&alert_danger).next().is_some() {
Err("ILIAS error".into())
} else { } else {
let url = format!("{}{}", ILIAS_URL, url); Ok(html)
self.client.get(&url).send().await?.text().await? }
};
Ok(Html::parse_document(&text))
} }
async fn get_html_fragment(&self, url: &str) -> Result<Html> { async fn get_html_fragment(&self, url: &str) -> Result<Html> {
let text = self.client.get(url).send().await?.text().await?; let text = self.download(url).await?.text().await?;
let html = Html::parse_fragment(&text); let html = Html::parse_fragment(&text);
// TODO: have this in get_html too
if html.select(&alert_danger).next().is_some() { if html.select(&alert_danger).next().is_some() {
//println!("{}", text);
Err("ILIAS error".into()) Err("ILIAS error".into())
} else { } else {
Ok(html) Ok(html)
@ -406,7 +405,7 @@ impl ILIAS {
async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result<Vec<Object>> { async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result<Vec<Object>> {
// TODO: this magically does not return sub-folders // TODO: this magically does not return sub-folders
// opening the same url in browser does show sub-folders.. // opening the same url in browser does show sub-folders?!
let url = format!( let url = format!(
"{}ilias.php?ref_id={}&cmdClass=ilobjcoursegui&cmd=showRepTree&cmdNode={}&baseClass=ilRepositoryGUI&cmdMode=asynch&exp_cmd=getNodeAsync&node_id=exp_node_rep_exp_{}&exp_cont=il_expl2_jstree_cont_rep_exp&searchterm=", "{}ilias.php?ref_id={}&cmdClass=ilobjcoursegui&cmd=showRepTree&cmdNode={}&baseClass=ilRepositoryGUI&cmdMode=asynch&exp_cmd=getNodeAsync&node_id=exp_node_rep_exp_{}&exp_cont=il_expl2_jstree_cont_rep_exp&searchterm=",
ILIAS_URL, ref_id, cmd_node, ref_id ILIAS_URL, ref_id, cmd_node, ref_id
@ -442,12 +441,14 @@ impl ILIAS {
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let opt = Opt::from_args(); let opt = Opt::from_args();
// need this because error handling is WIP
*PANIC_HOOK.lock() = panic::take_hook(); *PANIC_HOOK.lock() = panic::take_hook();
panic::set_hook(Box::new(|info| { panic::set_hook(Box::new(|info| {
*TASKS_RUNNING.lock() -= 1; *TASKS_RUNNING.lock() -= 1;
*TASKS_QUEUED.lock() -= 1; *TASKS_QUEUED.lock() -= 1;
PANIC_HOOK.lock()(info); PANIC_HOOK.lock()(info);
})); }));
let user = rprompt::prompt_reply_stdout("Username: ").unwrap(); let user = rprompt::prompt_reply_stdout("Username: ").unwrap();
let pass = rpassword::read_password_from_tty(Some("Password: ")).unwrap(); let pass = rpassword::read_password_from_tty(Some("Password: ")).unwrap();
let ilias = match ILIAS::login::<_, String>(opt, user, pass).await { let ilias = match ILIAS::login::<_, String>(opt, user, pass).await {
@ -457,8 +458,11 @@ async fn main() {
std::process::exit(77); std::process::exit(77);
} }
}; };
// need this to get the content tree if ilias.opt.content_tree {
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1").send().await; // need this to get the content tree
// TODO error handling
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1").send().await;
}
let ilias = Arc::new(ilias); let ilias = Arc::new(ilias);
let desktop = ilias.personal_desktop().await.unwrap(); let desktop = ilias.personal_desktop().await.unwrap();
for item in desktop.items { for item in desktop.items {
@ -474,8 +478,11 @@ async fn main() {
while *TASKS_QUEUED.lock() > 0 { while *TASKS_QUEUED.lock() > 0 {
tokio::time::delay_for(Duration::from_millis(500)).await; tokio::time::delay_for(Duration::from_millis(500)).await;
} }
// restore fast page loading times if ilias.opt.content_tree {
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1").send().await; // restore fast page loading times
// TODO error handling
let _ = ilias.client.get("https://ilias.studium.kit.edu/ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1").send().await;
}
} }
lazy_static!{ lazy_static!{
@ -539,20 +546,24 @@ fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::F
Err(e)?; Err(e)?;
} }
} }
let html = ilias.download(&url.url).await?.text().await?; let content = if ilias.opt.content_tree {
let cmd_node = cmd_node_regex.find(&html).ok_or::<Error>("can't find cmdNode".into())?.as_str()[8..].to_owned(); let html = ilias.download(&url.url).await?.text().await?;
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await; let cmd_node = cmd_node_regex.find(&html).ok_or::<Error>("can't find cmdNode".into())?.as_str()[8..].to_owned();
let content = match content_tree { let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
Ok(tree) => tree, match content_tree {
Err(e) => { Ok(tree) => tree,
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar Err(e) => {
// TODO: this is probably never the case for folders? // some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
if html.contains(r#"input[name="cmd[join]""#) { // TODO: this is probably never the case for folders?
return Ok(()); // ignore groups we are not in if html.contains(r#"input[name="cmd[join]""#) {
return Ok(()); // ignore groups we are not in
}
println!("Warning: {:?} falling back to incomplete course content extractor! {}", name, e.display_chain());
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
} }
println!("Warning: {:?} falling back to incomplete course content extractor! {}", name, e.display_chain());
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
} }
} else {
ilias.get_course_content(&url).await?
}; };
for item in content { for item in content {
let mut path = path.clone(); let mut path = path.clone();
@ -838,6 +849,10 @@ struct Opt {
#[structopt(short)] #[structopt(short)]
force: bool, force: bool,
/// Use content tree (slow but thorough)
#[structopt(long)]
content_tree: bool,
/// Verbose logging (print objects downloaded) /// Verbose logging (print objects downloaded)
#[structopt(short, multiple = true, parse(from_occurrences))] #[structopt(short, multiple = true, parse(from_occurrences))]
verbose: usize, verbose: usize,