Consistent code formatting

This commit is contained in:
FliegendeWurst 2021-05-28 16:02:05 +02:00
parent 5676476765
commit 5fb2faabfd
5 changed files with 140 additions and 177 deletions

3
rustfmt.toml Normal file
View File

@ -0,0 +1,3 @@
hard_tabs = true
match_block_trailing_comma = true
max_width = 145

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
use std::{path::PathBuf, sync::atomic::{AtomicBool, AtomicUsize}}; use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicUsize};
#[cfg(feature = "keyring-auth")] #[cfg(feature = "keyring-auth")]
use anyhow::anyhow; use anyhow::anyhow;
@ -73,7 +74,11 @@ pub struct Opt {
/// Requests per minute /// Requests per minute
#[structopt(long, default_value = "8")] #[structopt(long, default_value = "8")]
pub rate: usize pub rate: usize,
/// Attempt to re-use session cookies
#[structopt(long)]
pub keep_session: bool,
} }
pub static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0); pub static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0);

View File

@ -9,7 +9,7 @@ use reqwest::{Client, IntoUrl, Proxy, Url};
use scraper::{ElementRef, Html, Selector}; use scraper::{ElementRef, Html, Selector};
use serde_json::json; use serde_json::json;
use crate::{ILIAS_URL, cli::Opt, get_request_ticket, selectors::*}; use crate::{cli::Opt, get_request_ticket, selectors::*, ILIAS_URL};
pub struct ILIAS { pub struct ILIAS {
pub opt: Opt, pub opt: Opt,
@ -23,8 +23,9 @@ pub struct ILIAS {
/// Returns true if the error is caused by: /// Returns true if the error is caused by:
/// "http2 error: protocol error: not a result of an error" /// "http2 error: protocol error: not a result of an error"
fn error_is_http2(error: &reqwest::Error) -> bool { fn error_is_http2(error: &reqwest::Error) -> bool {
error.source() // hyper::Error error
.map(|x| x.source()) // -> h2::Error .source() // hyper::Error
.map(|x| x.source()) // h2::Error
.flatten() .flatten()
.map(|x| x.downcast_ref::<h2::Error>()) .map(|x| x.downcast_ref::<h2::Error>())
.flatten() .flatten()
@ -48,9 +49,16 @@ impl ILIAS {
let client = builder let client = builder
// timeout is infinite by default // timeout is infinite by default
.build()?; .build()?;
let this = ILIAS { opt, ignore, user, pass, client }; let this = ILIAS {
opt,
ignore,
user,
pass,
client,
};
info!("Logging into ILIAS using KIT account.."); info!("Logging into ILIAS using KIT account..");
let session_establishment = this.client let session_establishment = this
.client
.post("https://ilias.studium.kit.edu/Shibboleth.sso/Login") .post("https://ilias.studium.kit.edu/Shibboleth.sso/Login")
.form(&json!({ .form(&json!({
"sendLogin": "1", "sendLogin": "1",
@ -58,29 +66,33 @@ impl ILIAS {
"target": "/shib_login.php?target=", "target": "/shib_login.php?target=",
"home_organization_selection": "Mit KIT-Account anmelden" "home_organization_selection": "Mit KIT-Account anmelden"
})) }))
.send().await?; .send()
.await?;
let url = session_establishment.url().clone(); let url = session_establishment.url().clone();
let text = session_establishment.text().await?; let text = session_establishment.text().await?;
let dom_sso = Html::parse_document(text.as_str()); let dom_sso = Html::parse_document(text.as_str());
let csrf_token = dom_sso let csrf_token = dom_sso
.select(&Selector::parse(r#"input[name="csrf_token"]"#).unwrap()) .select(&Selector::parse(r#"input[name="csrf_token"]"#).unwrap())
.next().context("no csrf token")?; .next()
.context("no CSRF token found")?
.value().attr("value").context("no CSRF token value")?;
info!("Logging into Shibboleth.."); info!("Logging into Shibboleth..");
let login_response = this.client let login_response = this
.client
.post(url) .post(url)
.form(&json!({ .form(&json!({
"j_username": &this.user, "j_username": &this.user,
"j_password": &this.pass, "j_password": &this.pass,
"_eventId_proceed": "", "_eventId_proceed": "",
"csrf_token": csrf_token.value().attr("value").context("no csrf token")?, "csrf_token": csrf_token,
})) }))
.send().await? .send()
.text().await?; .await?
.text()
.await?;
let dom = Html::parse_document(&login_response); let dom = Html::parse_document(&login_response);
let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap(); let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap();
let saml = dom let saml = dom.select(&saml).next().context("no SAML response, incorrect password?")?;
.select(&saml)
.next().context("no SAML response, incorrect password?")?;
let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap(); let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap();
let relay_state = dom.select(&relay_state).next().context("no relay state")?; let relay_state = dom.select(&relay_state).next().context("no relay state")?;
info!("Logging into ILIAS.."); info!("Logging into ILIAS..");
@ -90,7 +102,8 @@ impl ILIAS {
"SAMLResponse": saml.value().attr("value").context("no SAML value")?, "SAMLResponse": saml.value().attr("value").context("no SAML value")?,
"RelayState": relay_state.value().attr("value").context("no RelayState value")? "RelayState": relay_state.value().attr("value").context("no RelayState value")?
})) }))
.send().await?; .send()
.await?;
success!("Logged in!"); success!("Logged in!");
Ok(this) Ok(this)
} }
@ -111,9 +124,9 @@ impl ILIAS {
Ok(x) => return Ok(x), Ok(x) => return Ok(x),
Err(e) if attempt <= 3 && error_is_http2(&e) => { Err(e) if attempt <= 3 && error_is_http2(&e) => {
warning!(1; "encountered HTTP/2 NO_ERROR, retrying download.."); warning!(1; "encountered HTTP/2 NO_ERROR, retrying download..");
continue continue;
}, },
Err(e) => return Err(e.into()) Err(e) => return Err(e.into()),
} }
} }
unreachable!() unreachable!()
@ -128,9 +141,9 @@ impl ILIAS {
Ok(x) => return Ok(x), Ok(x) => return Ok(x),
Err(e) if attempt <= 3 && error_is_http2(&e) => { Err(e) if attempt <= 3 && error_is_http2(&e) => {
warning!(1; "encountered HTTP/2 NO_ERROR, retrying HEAD request.."); warning!(1; "encountered HTTP/2 NO_ERROR, retrying HEAD request..");
continue continue;
}, },
Err(e) => return Err(e) Err(e) => return Err(e),
} }
} }
unreachable!() unreachable!()
@ -159,9 +172,7 @@ impl ILIAS {
pub fn get_items(html: &Html) -> Vec<Result<Object>> { pub fn get_items(html: &Html) -> Vec<Result<Object>> {
html.select(&container_items) html.select(&container_items)
.flat_map(|item| { .flat_map(|item| {
item.select(&container_item_title) item.select(&container_item_title).next().map(|link| Object::from_link(item, link))
.next()
.map(|link| Object::from_link(item, link))
// items without links are ignored // items without links are ignored
}) })
.collect() .collect()
@ -172,11 +183,14 @@ impl ILIAS {
let html = self.get_html(&url.url).await?; let html = self.get_html(&url.url).await?;
let main_text = if let Some(el) = html.select(&il_content_container).next() { let main_text = if let Some(el) = html.select(&il_content_container).next() {
if !el
if !el.children().flat_map(|x| x.value().as_element()).next() .children()
.map(|x| x.attr("class").unwrap_or_default() .flat_map(|x| x.value().as_element())
.contains("ilContainerBlock")).unwrap_or(false) .next()
&& el.inner_html().len() > 40 { .map(|x| x.attr("class").unwrap_or_default().contains("ilContainerBlock"))
.unwrap_or(false)
&& el.inner_html().len() > 40
{
// ^ minimum length of useful content? // ^ minimum length of useful content?
Some(el.inner_html()) Some(el.inner_html())
} else { } else {
@ -198,7 +212,7 @@ impl ILIAS {
); );
let html = self.get_html_fragment(&url).await?; let html = self.get_html_fragment(&url).await?;
let mut items = Vec::new(); let mut items = Vec::new();
for link in html.select(&a) { for link in html.select(&LINKS) {
if link.value().attr("href").is_some() { if link.value().attr("href").is_some() {
items.push(Object::from_link(link, link)?); items.push(Object::from_link(link, link)?);
} // else: disabled course } // else: disabled course
@ -243,7 +257,7 @@ impl Object {
| Generic { name, .. } => &name, | Generic { name, .. } => &name,
Thread { url } => &url.thr_pk.as_ref().unwrap(), Thread { url } => &url.thr_pk.as_ref().unwrap(),
Video { url } => &url.url, Video { url } => &url.url,
PersonalDesktop { url } => url.cmd.as_ref().unwrap() PersonalDesktop { .. } => panic!("name of personal desktop requested (this should never happen)"),
} }
} }
@ -286,25 +300,18 @@ impl Object {
} }
pub fn is_dir(&self) -> bool { pub fn is_dir(&self) -> bool {
matches!(self, matches!(
self,
Course { .. } Course { .. }
| Folder { .. } | Folder { .. } | PersonalDesktop { .. }
| PersonalDesktop { .. } | Forum { .. } | Thread { .. }
| Forum { .. } | Wiki { .. } | ExerciseHandler { .. }
| Thread { .. }
| Wiki { .. }
| ExerciseHandler { .. }
| PluginDispatch { .. } | PluginDispatch { .. }
) )
} }
pub fn from_link(item: ElementRef, link: ElementRef) -> Result<Self> { pub fn from_link(item: ElementRef, link: ElementRef) -> Result<Self> {
let name = link let name = link.text().collect::<String>().replace('/', "-").trim().to_owned();
.text()
.collect::<String>()
.replace('/', "-")
.trim()
.to_owned();
let url = URL::from_href(link.value().attr("href").context("link missing href")?)?; let url = URL::from_href(link.value().attr("href").context("link missing href")?)?;
Object::from_url(url, name, Some(item)) Object::from_url(url, name, Some(item))
} }
@ -314,10 +321,7 @@ impl Object {
return Ok(Thread { url }); return Ok(Thread { url });
} }
if url if url.url.starts_with("https://ilias.studium.kit.edu/goto.php") {
.url
.starts_with("https://ilias.studium.kit.edu/goto.php")
{
let target = url.target.as_deref().unwrap_or("NONE"); let target = url.target.as_deref().unwrap_or("NONE");
if target.starts_with("wiki_") { if target.starts_with("wiki_") {
return Ok(Wiki { return Ok(Wiki {
@ -356,11 +360,7 @@ impl Object {
} else { } else {
let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop); let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop);
let ext = item_props.next().context("cannot find file extension")?; let ext = item_props.next().context("cannot find file extension")?;
let version = item_props let version = item_props.nth(1).context("cannot find 3rd file metadata")?.text().collect::<String>();
.nth(1)
.context("cannot find 3rd file metadata")?
.text()
.collect::<String>();
let version = version.trim(); let version = version.trim();
if let Some(v) = version.strip_prefix("Version: ") { if let Some(v) = version.strip_prefix("Version: ") {
name += "_v"; name += "_v";

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
#![allow(clippy::comparison_to_empty, clippy::upper_case_acronyms)] #![allow(clippy::upper_case_acronyms)]
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use colored::Colorize; use colored::Colorize;
@ -13,17 +13,17 @@ use indicatif::{ProgressDrawTarget, ProgressStyle};
use once_cell::sync::{Lazy, OnceCell}; use once_cell::sync::{Lazy, OnceCell};
use scraper::Html; use scraper::Html;
use structopt::StructOpt; use structopt::StructOpt;
use tokio::{fs, sync::Semaphore, time};
use tokio::task::{self, JoinHandle}; use tokio::task::{self, JoinHandle};
use tokio::{fs, sync::Semaphore, time};
use tokio_util::io::StreamReader; use tokio_util::io::StreamReader;
use url::Url; use url::Url;
use std::collections::HashSet;
use std::future::Future; use std::future::Future;
use std::io; use std::io;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::sync::Arc; use std::sync::Arc;
use std::collections::HashSet;
pub const ILIAS_URL: &str = "https://ilias.studium.kit.edu/"; pub const ILIAS_URL: &str = "https://ilias.studium.kit.edu/";
@ -72,8 +72,8 @@ async fn real_main(mut opt: Opt) -> Result<()> {
#[cfg(windows)] #[cfg(windows)]
let _ = colored::control::set_virtual_terminal(true); let _ = colored::control::set_virtual_terminal(true);
// use UNC paths on Windows
create_dir(&opt.output).await.context("failed to create output directory")?; create_dir(&opt.output).await.context("failed to create output directory")?;
// use UNC paths on Windows (#6)
opt.output = fs::canonicalize(opt.output).await.context("failed to canonicalize output directory")?; opt.output = fs::canonicalize(opt.output).await.context("failed to canonicalize output directory")?;
// load .iliasignore file // load .iliasignore file
@ -107,8 +107,10 @@ async fn real_main(mut opt: Opt) -> Result<()> {
}, },
}; };
if ilias.opt.content_tree { if ilias.opt.content_tree {
// need this to get the content tree if let Err(e) = ilias
if let Err(e) = ilias.download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1").await { .download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1")
.await
{
warning!("could not enable content tree:", e); warning!("could not enable content tree:", e);
} }
} }
@ -123,8 +125,10 @@ async fn real_main(mut opt: Opt) -> Result<()> {
PROGRESS_BAR.set_message("initializing.."); PROGRESS_BAR.set_message("initializing..");
} }
let sync_url = ilias.opt.sync_url.clone().unwrap_or_else(|| {
// default sync URL: main personal desktop // default sync URL: main personal desktop
let sync_url = ilias.opt.sync_url.clone().unwrap_or_else(|| format!("{}ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems", ILIAS_URL)); format!("{}ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems", ILIAS_URL)
});
let obj = Object::from_url(URL::from_href(&sync_url).context("invalid sync URL")?, String::new(), None).context("invalid sync object")?; // name can be empty for first element let obj = Object::from_url(URL::from_href(&sync_url).context("invalid sync URL")?, String::new(), None).context("invalid sync object")?; // name can be empty for first element
spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj)); spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
@ -134,13 +138,14 @@ async fn real_main(mut opt: Opt) -> Result<()> {
error!(e) error!(e)
} }
} else { } else {
break; break; // channel is empty => all tasks are completed
} }
} }
// channel is empty => all tasks are completed
if ilias.opt.content_tree { if ilias.opt.content_tree {
// restore fast page loading times if let Err(e) = ilias
if let Err(e) = ilias.download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1").await { .download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1")
.await
{
warning!("could not disable content tree:", e); warning!("could not disable content tree:", e);
} }
} }
@ -153,11 +158,8 @@ async fn real_main(mut opt: Opt) -> Result<()> {
// https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229 // https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229
#[allow(clippy::manual_async_fn)] #[allow(clippy::manual_async_fn)]
fn process_gracefully( fn process_gracefully(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl Future<Output = ()> + Send {
ilias: Arc<ILIAS>, async move {
path: PathBuf,
obj: Object,
) -> impl Future<Output = ()> + Send { async move {
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) { if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
PROGRESS_BAR.inc_length(1); PROGRESS_BAR.inc_length(1);
} }
@ -167,7 +169,8 @@ fn process_gracefully(
error!("Syncing {}", path_text; e); error!("Syncing {}", path_text; e);
} }
drop(permit); drop(permit);
}} }
}
async fn handle_gracefully(fut: impl Future<Output = Result<()>>) { async fn handle_gracefully(fut: impl Future<Output = Result<()>>) {
if let Err(e) = fut.await { if let Err(e) = fut.await {
@ -181,11 +184,11 @@ mod selectors {
use regex::Regex; use regex::Regex;
use scraper::Selector; use scraper::Selector;
// construct CSS selectors once // construct CSS selectors once
pub static a: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap()); pub static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
pub static a_target_blank: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap()); pub static a_target_blank: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap());
pub static img: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap()); pub static IMAGES: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap());
pub static table: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap()); pub static TABLES: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap());
pub static video_tr: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap()); pub static VIDEO_ROWS: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap());
pub static links_in_table: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap()); pub static links_in_table: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap());
pub static th: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap()); pub static th: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap());
pub static td: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap()); pub static td: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
@ -253,7 +256,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
}; };
if let Some(s) = content.1.as_ref() { if let Some(s) = content.1.as_ref() {
let path = path.join("course.html"); let path = path.join("course.html");
write_file_data(&path, &mut s.as_bytes()).await.context("failed to write course page html")?; write_file_data(&path, &mut s.as_bytes())
.await
.context("failed to write course page html")?;
} }
for item in content.0 { for item in content.0 {
let item = item?; let item = item?;
@ -266,7 +271,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let content = ilias.get_course_content(&url).await?; let content = ilias.get_course_content(&url).await?;
if let Some(s) = content.1.as_ref() { if let Some(s) = content.1.as_ref() {
let path = path.join("folder.html"); let path = path.join("folder.html");
write_file_data(&path, &mut s.as_bytes()).await.context("failed to write folder page html")?; write_file_data(&path, &mut s.as_bytes())
.await
.context("failed to write folder page html")?;
} }
for item in content.0 { for item in content.0 {
let item = item?; let item = item?;
@ -284,9 +291,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
return Ok(()); return Ok(());
} }
let data = ilias.download(&url.url).await?; let data = ilias.download(&url.url).await?;
let mut reader = StreamReader::new(data.bytes_stream().map_err(|x| { let mut reader = StreamReader::new(data.bytes_stream().map_err(|x| io::Error::new(io::ErrorKind::Other, x)));
io::Error::new(io::ErrorKind::Other, x)
}));
log!(0, "Writing {}", relative_path.to_string_lossy()); log!(0, "Writing {}", relative_path.to_string_lossy());
write_file_data(&path, &mut reader).await?; write_file_data(&path, &mut reader).await?;
}, },
@ -301,10 +306,12 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let data = ilias.download(&list_url).await?; let data = ilias.download(&list_url).await?;
let html = data.text().await?; let html = data.text().await?;
let html = Html::parse_fragment(&html); let html = Html::parse_fragment(&html);
html.select(&a) html.select(&LINKS)
.filter_map(|link| link.value().attr("href")) .filter_map(|link| link.value().attr("href"))
.filter(|href| href.contains("trows=800")) .filter(|href| href.contains("trows=800"))
.map(|x| x.to_string()).next().context("video list link not found")? .map(|x| x.to_string())
.next()
.context("video list link not found")?
}; };
log!(1, "Rewriting {}", full_url); log!(1, "Rewriting {}", full_url);
let mut full_url = Url::parse(&format!("{}{}", ILIAS_URL, full_url))?; let mut full_url = Url::parse(&format!("{}{}", ILIAS_URL, full_url))?;
@ -322,7 +329,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let data = ilias.download(full_url.as_str()).await?; let data = ilias.download(full_url.as_str()).await?;
let html = data.text().await?; let html = data.text().await?;
let html = Html::parse_fragment(&html); let html = Html::parse_fragment(&html);
for row in html.select(&video_tr) { for row in html.select(&VIDEO_ROWS) {
let link = row.select(&a_target_blank).next(); let link = row.select(&a_target_blank).next();
if link.is_none() { if link.is_none() {
if !row.text().any(|x| x == NO_ENTRIES) { if !row.text().any(|x| x == NO_ENTRIES) {
@ -342,17 +349,10 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
path.push(format!("{}.mp4", file_escape(title))); path.push(format!("{}.mp4", file_escape(title)));
log!(1, "Found video: {}", title); log!(1, "Found video: {}", title);
let video = Video { let video = Video {
url: URL::raw( url: URL::raw(link.value().attr("href").context("video link without href")?.to_owned()),
link.value()
.attr("href")
.context("video link without href")?
.to_owned(),
),
}; };
let ilias = Arc::clone(&ilias); let ilias = Arc::clone(&ilias);
spawn!(async { spawn!(process_gracefully(ilias, path, video));
process_gracefully(ilias, path, video).await;
});
} }
} }
}, },
@ -372,10 +372,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let mut json_capture = XOCT_REGEX.captures_iter(&html); let mut json_capture = XOCT_REGEX.captures_iter(&html);
let json = &json_capture.next().context("xoct player json not found")?[1]; let json = &json_capture.next().context("xoct player json not found")?[1];
log!(2, "{}", json); log!(2, "{}", json);
let json = json let json = json.split(",\n").next().context("invalid xoct player json")?;
.split(",\n")
.next()
.context("invalid xoct player json")?;
serde_json::from_str(&json.trim())? serde_json::from_str(&json.trim())?
}; };
log!(2, "{}", json); log!(2, "{}", json);
@ -386,10 +383,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
.context("video src not string")?; .context("video src not string")?;
let meta = fs::metadata(&path).await; let meta = fs::metadata(&path).await;
if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos { if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos {
let head = ilias let head = ilias.head(url).await.context("HEAD request failed")?;
.head(url)
.await
.context("HEAD request failed")?;
if let Some(len) = head.headers().get("content-length") { if let Some(len) = head.headers().get("content-length") {
if meta?.len() != len.to_str()?.parse::<u64>()? { if meta?.len() != len.to_str()?.parse::<u64>()? {
warning!(relative_path.to_string_lossy(), "was updated, consider moving the outdated file"); warning!(relative_path.to_string_lossy(), "was updated, consider moving the outdated file");
@ -397,10 +391,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
} }
} else { } else {
let resp = ilias.download(&url).await?; let resp = ilias.download(&url).await?;
let mut reader = StreamReader::new( let mut reader = StreamReader::new(resp.bytes_stream().map_err(|x| io::Error::new(io::ErrorKind::Other, x)));
resp.bytes_stream()
.map_err(|x| io::Error::new(io::ErrorKind::Other, x)),
);
log!(0, "Writing {}", relative_path.to_string_lossy()); log!(0, "Writing {}", relative_path.to_string_lossy());
write_file_data(&path, &mut reader).await?; write_file_data(&path, &mut reader).await?;
} }
@ -415,9 +406,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let html_text = data.await?.text().await?; let html_text = data.await?.text().await?;
let url = { let url = {
let html = Html::parse_document(&html_text); let html = Html::parse_document(&html_text);
let thread_count_selector = html.select(&a) let thread_count_selector = html.select(&LINKS).flat_map(|x| x.value().attr("href")).find(|x| x.contains("trows=800"));
.flat_map(|x| x.value().attr("href"))
.find(|x| x.contains("trows=800"));
if thread_count_selector.is_none() { if thread_count_selector.is_none() {
if let Some(cell) = html.select(&td).next() { if let Some(cell) = html.select(&td).next() {
if cell.text().any(|x| x == NO_ENTRIES) { if cell.text().any(|x| x == NO_ENTRIES) {
@ -448,19 +437,12 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
); );
continue; continue;
} }
let link = cells[1] let link = cells[1].select(&LINKS).next().context("thread link not found")?;
.select(&a)
.next()
.context("thread link not found")?;
let object = Object::from_link(link, link)?; let object = Object::from_link(link, link)?;
let mut path = path.clone(); let mut path = path.clone();
let name = format!( let name = format!(
"{}_{}", "{}_{}",
object object.url().thr_pk.as_ref().context("thr_pk not found for thread")?,
.url()
.thr_pk
.as_ref()
.context("thr_pk not found for thread")?,
link.text().collect::<String>().trim() link.text().collect::<String>().trim()
); );
path.push(file_escape(&name)); path.push(file_escape(&name));
@ -504,15 +486,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
.context("post title not found")? .context("post title not found")?
.text() .text()
.collect::<String>(); .collect::<String>();
let author = post let author = post.select(&span_small).next().context("post author not found")?;
.select(&span_small)
.next()
.context("post author not found")?;
let author = author.text().collect::<String>(); let author = author.text().collect::<String>();
let author = author let author = author.trim().split('|').collect::<Vec<_>>();
.trim()
.split('|')
.collect::<Vec<_>>();
let author = if author.len() == 2 { let author = if author.len() == 2 {
author[0] // pseudonymous forum author[0] // pseudonymous forum
} else if author.len() == 3 { } else if author.len() == 3 {
@ -523,36 +499,26 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
} }
} else { } else {
return Err(anyhow!("author data in unknown format")); return Err(anyhow!("author data in unknown format"));
}.trim(); }
let container = post .trim();
.select(&post_container) let container = post.select(&post_container).next().context("post container not found")?;
.next() let link = container.select(&LINKS).next().context("post link not found")?;
.context("post container not found")?; let id = link.value().attr("id").context("no id in thread link")?.to_owned();
let link = container.select(&a).next().context("post link not found")?;
let id = link
.value()
.attr("id")
.context("no id in thread link")?
.to_owned();
let name = format!("{}_{}_{}.html", id, author, title.trim()); let name = format!("{}_{}_{}.html", id, author, title.trim());
let data = container.inner_html(); let data = container.inner_html();
let path = path.join(file_escape(&name)); let path = path.join(file_escape(&name));
let relative_path = relative_path.join(file_escape(&name)); let relative_path = relative_path.join(file_escape(&name));
spawn!(handle_gracefully(async move { spawn!(handle_gracefully(async move {
log!(0, "Writing {}", relative_path.display()); log!(0, "Writing {}", relative_path.display());
write_file_data(&path, &mut data.as_bytes()) write_file_data(&path, &mut data.as_bytes()).await.context("failed to write forum post")
.await
.context("failed to write forum post")
})); }));
let images = container let images = container.select(&IMAGES).map(|x| x.value().attr("src").map(|x| x.to_owned()));
.select(&img)
.map(|x| x.value().attr("src").map(|x| x.to_owned()));
for image in images { for image in images {
let image = image.context("no src on image")?; let image = image.context("no src on image")?;
all_images.push((id.clone(), image)); all_images.push((id.clone(), image));
} }
if let Some(container) = container.select(&post_attachments).next() { if let Some(container) = container.select(&post_attachments).next() {
for attachment in container.select(&a) { for attachment in container.select(&LINKS) {
attachments.push(( attachments.push((
id.clone(), id.clone(),
attachment.text().collect::<String>(), attachment.text().collect::<String>(),
@ -562,16 +528,14 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
} }
} }
// pagination // pagination
if let Some(pages) = html.select(&table).next() { if let Some(pages) = html.select(&TABLES).next() {
if let Some(last) = pages.select(&links_in_table).last() { if let Some(last) = pages.select(&links_in_table).last() {
let text = last.text().collect::<String>(); let text = last.text().collect::<String>();
if text.trim() == ">>" { if text.trim() == ">>" {
// not last page yet // not last page yet
let ilias = Arc::clone(&ilias); let ilias = Arc::clone(&ilias);
let next_page = Thread { let next_page = Thread {
url: URL::from_href( url: URL::from_href(last.value().attr("href").context("page link not found")?)?,
last.value().attr("href").context("page link not found")?,
)?,
}; };
spawn!(process_gracefully(ilias, path.clone(), next_page)); spawn!(process_gracefully(ilias, path.clone(), next_page));
} }
@ -622,7 +586,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let html = ilias.get_html(&url.url).await?; let html = ilias.get_html(&url.url).await?;
let mut filenames = HashSet::new(); let mut filenames = HashSet::new();
for row in html.select(&form_group) { for row in html.select(&form_group) {
let link = row.select(&a).next(); let link = row.select(&LINKS).next();
if link.is_none() { if link.is_none() {
continue; continue;
} }
@ -634,10 +598,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let href = href.unwrap(); let href = href.unwrap();
let url = URL::from_href(href)?; let url = URL::from_href(href)?;
let cmd = url.cmd.as_deref().unwrap_or(""); let cmd = url.cmd.as_deref().unwrap_or("");
if cmd != "downloadFile" if cmd != "downloadFile" && cmd != "downloadGlobalFeedbackFile" && cmd != "downloadFeedbackFile" {
&& cmd != "downloadGlobalFeedbackFile"
&& cmd != "downloadFeedbackFile"
{
continue; continue;
} }
// link is definitely just a download link to the exercise or the solution // link is definitely just a download link to the exercise or the solution
@ -660,10 +621,10 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let mut i = 1; let mut i = 1;
while filenames.contains(&unique_filename) { while filenames.contains(&unique_filename) {
i += 1; i += 1;
if name != "" { if name.is_empty() {
unique_filename = format!("{}{}.{}", name, i, extension);
} else {
unique_filename = format!("{}{}", extension, i); unique_filename = format!("{}{}", extension, i);
} else {
unique_filename = format!("{}{}.{}", name, i, extension);
} }
} }
filenames.insert(unique_filename.clone()); filenames.insert(unique_filename.clone());
@ -691,17 +652,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
let urls = { let urls = {
let html = ilias.get_html(url).await?; let html = ilias.get_html(url).await?;
html.select(&a) html.select(&LINKS)
.filter_map(|x| { .filter_map(|x| x.value().attr("href").map(|y| (y, x.text().collect::<String>())))
x.value() .map(|(x, y)| URL::from_href(x).map(|z| (z, y.trim().to_owned())).context("parsing weblink"))
.attr("href")
.map(|y| (y, x.text().collect::<String>()))
})
.map(|(x, y)| {
URL::from_href(x)
.map(|z| (z, y.trim().to_owned()))
.context("parsing weblink")
})
.collect::<Result<Vec<_>>>() .collect::<Result<Vec<_>>>()
}?; }?;

View File

@ -10,7 +10,9 @@ use crate::Result;
/// Write all data to the specified path. Will overwrite previous file data. /// Write all data to the specified path. Will overwrite previous file data.
pub async fn write_file_data<R: ?Sized>(path: impl AsRef<Path>, data: &mut R) -> Result<()> pub async fn write_file_data<R: ?Sized>(path: impl AsRef<Path>, data: &mut R) -> Result<()>
where R: AsyncRead + Unpin { where
R: AsyncRead + Unpin,
{
let file = AsyncFile::create(path.as_ref()).await.context("failed to create file")?; let file = AsyncFile::create(path.as_ref()).await.context("failed to create file")?;
let mut file = BufWriter::new(file); let mut file = BufWriter::new(file);
tokio::io::copy(data, &mut file).await.context("failed to write to file")?; tokio::io::copy(data, &mut file).await.context("failed to write to file")?;