mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Various bugfixes
course.html: no longer includes right column course.html/folder.html: only downloaded if useful correct handling of pseudonymous forums
This commit is contained in:
parent
338c97821b
commit
3887d6781f
47
src/main.rs
47
src/main.rs
@ -158,7 +158,7 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
}
|
}
|
||||||
if let Some(url) = ilias.opt.sync_url.as_ref() {
|
if let Some(url) = ilias.opt.sync_url.as_ref() {
|
||||||
// TODO: this should be unified with the download logic below
|
// TODO: this should be unified with the download logic below
|
||||||
let obj = Object::from_url(URL::from_href(url).expect("invalid URL"), "".to_owned(), None).expect("invalid object"); // name can be empty for first element
|
let obj = Object::from_url(URL::from_href(url).context("invalid sync URL")?, "Sync URL".to_owned(), None).context("invalid sync object")?; // name can be empty for first element
|
||||||
spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
|
spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
|
||||||
} else {
|
} else {
|
||||||
let desktop = ilias.personal_desktop().await.context("Failed to load personal desktop")?;
|
let desktop = ilias.personal_desktop().await.context("Failed to load personal desktop")?;
|
||||||
@ -284,7 +284,10 @@ mod selectors {
|
|||||||
pub static cmd_node_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"cmdNode=uf:\w\w"#).unwrap());
|
pub static cmd_node_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"cmdNode=uf:\w\w"#).unwrap());
|
||||||
pub static image_src_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap());
|
pub static image_src_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap());
|
||||||
pub static XOCT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap());
|
pub static XOCT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap());
|
||||||
pub static il_content_container: Lazy<Selector> = Lazy::new(|| Selector::parse("#ilContentContainer").unwrap());
|
pub static il_content_container: Lazy<Selector> = Lazy::new(|| Selector::parse("#il_center_col").unwrap());
|
||||||
|
pub static item_prop: Lazy<Selector> = Lazy::new(|| Selector::parse("span.il_ItemProperty").unwrap());
|
||||||
|
pub static container_items: Lazy<Selector> = Lazy::new(|| Selector::parse("div.il_ContainerListItem").unwrap());
|
||||||
|
pub static container_item_title: Lazy<Selector> = Lazy::new(|| Selector::parse("a.il_ContainerItemTitle").unwrap());
|
||||||
}
|
}
|
||||||
use crate::selectors::*;
|
use crate::selectors::*;
|
||||||
|
|
||||||
@ -327,8 +330,8 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
ilias.get_course_content(&url).await?
|
ilias.get_course_content(&url).await?
|
||||||
};
|
};
|
||||||
if let Some(s) = content.1.as_ref() {
|
if let Some(s) = content.1.as_ref() {
|
||||||
let path = ilias.opt.output.join("course.html");
|
let path = path.join("course.html");
|
||||||
write_file_data(&path, &mut s.as_bytes()).await.expect("failed to write course page html");
|
write_file_data(&path, &mut s.as_bytes()).await.context("failed to write course page html")?;
|
||||||
}
|
}
|
||||||
for item in content.0 {
|
for item in content.0 {
|
||||||
let item = item?;
|
let item = item?;
|
||||||
@ -339,6 +342,10 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
},
|
},
|
||||||
Folder { url, .. } => {
|
Folder { url, .. } => {
|
||||||
let content = ilias.get_course_content(&url).await?;
|
let content = ilias.get_course_content(&url).await?;
|
||||||
|
if let Some(s) = content.1.as_ref() {
|
||||||
|
let path = path.join("folder.html");
|
||||||
|
write_file_data(&path, &mut s.as_bytes()).await.context("failed to write folder page html")?;
|
||||||
|
}
|
||||||
for item in content.0 {
|
for item in content.0 {
|
||||||
let item = item?;
|
let item = item?;
|
||||||
let path = path.join(file_escape(item.name()));
|
let path = path.join(file_escape(item.name()));
|
||||||
@ -464,7 +471,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
.await
|
.await
|
||||||
.context("HEAD request failed")?;
|
.context("HEAD request failed")?;
|
||||||
if let Some(len) = head.headers().get("content-length") {
|
if let Some(len) = head.headers().get("content-length") {
|
||||||
if meta.unwrap().len() != len.to_str()?.parse::<u64>()? {
|
if meta?.len() != len.to_str()?.parse::<u64>()? {
|
||||||
warning!(relative_path.to_string_lossy(), "was updated, consider moving the outdated file");
|
warning!(relative_path.to_string_lossy(), "was updated, consider moving the outdated file");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -585,9 +592,18 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let author = author
|
let author = author
|
||||||
.trim()
|
.trim()
|
||||||
.split('|')
|
.split('|')
|
||||||
.nth(1)
|
.collect::<Vec<_>>();
|
||||||
.context("author data in unknown format")?
|
let author = if author.len() == 2 {
|
||||||
.trim();
|
author[0] // pseudonymous forum
|
||||||
|
} else if author.len() == 3 {
|
||||||
|
if author[1] != "Pseudonym" {
|
||||||
|
author[1]
|
||||||
|
} else {
|
||||||
|
author[0]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(anyhow!("author data in unknown format"));
|
||||||
|
}.trim();
|
||||||
let container = post
|
let container = post
|
||||||
.select(&post_container)
|
.select(&post_container)
|
||||||
.next()
|
.next()
|
||||||
@ -965,8 +981,6 @@ impl ILIAS {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn get_items(html: &Html) -> Vec<Result<Object>> {
|
fn get_items(html: &Html) -> Vec<Result<Object>> {
|
||||||
let container_items = Selector::parse("div.il_ContainerListItem").unwrap();
|
|
||||||
let container_item_title = Selector::parse("a.il_ContainerItemTitle").unwrap();
|
|
||||||
html.select(&container_items)
|
html.select(&container_items)
|
||||||
.flat_map(|item| {
|
.flat_map(|item| {
|
||||||
item.select(&container_item_title)
|
item.select(&container_item_title)
|
||||||
@ -980,7 +994,17 @@ impl ILIAS {
|
|||||||
/// Returns subfolders and the main text on the course page.
|
/// Returns subfolders and the main text on the course page.
|
||||||
async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
||||||
let html = self.get_html(&url.url).await?;
|
let html = self.get_html(&url.url).await?;
|
||||||
let main_text = html.select(&il_content_container).next().map(|x| x.inner_html());
|
let main_text = if let Some(el) = html.select(&il_content_container).next() {
|
||||||
|
if !el.children().flat_map(|x| x.value().as_element()).next().map(|x|
|
||||||
|
x.attr("class").unwrap_or_default().contains("ilContainerBlock")).unwrap_or(false) {
|
||||||
|
Some(el.inner_html())
|
||||||
|
} else {
|
||||||
|
// first element is the content overview => no custom text (?)
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
Ok((ILIAS::get_items(&html), main_text))
|
Ok((ILIAS::get_items(&html), main_text))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1159,7 +1183,6 @@ impl Object {
|
|||||||
// download page containing metadata
|
// download page containing metadata
|
||||||
return Ok(Generic { name, url });
|
return Ok(Generic { name, url });
|
||||||
} else {
|
} else {
|
||||||
let item_prop = Selector::parse("span.il_ItemProperty").unwrap();
|
|
||||||
let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop);
|
let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop);
|
||||||
let ext = item_props.next().context("cannot find file extension")?;
|
let ext = item_props.next().context("cannot find file extension")?;
|
||||||
let version = item_props
|
let version = item_props
|
||||||
|
Loading…
Reference in New Issue
Block a user