mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Consistent code formatting
This commit is contained in:
parent
5676476765
commit
5fb2faabfd
3
rustfmt.toml
Normal file
3
rustfmt.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
hard_tabs = true
|
||||||
|
match_block_trailing_comma = true
|
||||||
|
max_width = 145
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
use std::{path::PathBuf, sync::atomic::{AtomicBool, AtomicUsize}};
|
use std::path::PathBuf;
|
||||||
|
use std::sync::atomic::{AtomicBool, AtomicUsize};
|
||||||
|
|
||||||
#[cfg(feature = "keyring-auth")]
|
#[cfg(feature = "keyring-auth")]
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
@ -73,7 +74,11 @@ pub struct Opt {
|
|||||||
|
|
||||||
/// Requests per minute
|
/// Requests per minute
|
||||||
#[structopt(long, default_value = "8")]
|
#[structopt(long, default_value = "8")]
|
||||||
pub rate: usize
|
pub rate: usize,
|
||||||
|
|
||||||
|
/// Attempt to re-use session cookies
|
||||||
|
#[structopt(long)]
|
||||||
|
pub keep_session: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0);
|
pub static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0);
|
||||||
|
106
src/ilias.rs
106
src/ilias.rs
@ -9,7 +9,7 @@ use reqwest::{Client, IntoUrl, Proxy, Url};
|
|||||||
use scraper::{ElementRef, Html, Selector};
|
use scraper::{ElementRef, Html, Selector};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::{ILIAS_URL, cli::Opt, get_request_ticket, selectors::*};
|
use crate::{cli::Opt, get_request_ticket, selectors::*, ILIAS_URL};
|
||||||
|
|
||||||
pub struct ILIAS {
|
pub struct ILIAS {
|
||||||
pub opt: Opt,
|
pub opt: Opt,
|
||||||
@ -23,8 +23,9 @@ pub struct ILIAS {
|
|||||||
/// Returns true if the error is caused by:
|
/// Returns true if the error is caused by:
|
||||||
/// "http2 error: protocol error: not a result of an error"
|
/// "http2 error: protocol error: not a result of an error"
|
||||||
fn error_is_http2(error: &reqwest::Error) -> bool {
|
fn error_is_http2(error: &reqwest::Error) -> bool {
|
||||||
error.source() // hyper::Error
|
error
|
||||||
.map(|x| x.source()) // -> h2::Error
|
.source() // hyper::Error
|
||||||
|
.map(|x| x.source()) // h2::Error
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|x| x.downcast_ref::<h2::Error>())
|
.map(|x| x.downcast_ref::<h2::Error>())
|
||||||
.flatten()
|
.flatten()
|
||||||
@ -48,9 +49,16 @@ impl ILIAS {
|
|||||||
let client = builder
|
let client = builder
|
||||||
// timeout is infinite by default
|
// timeout is infinite by default
|
||||||
.build()?;
|
.build()?;
|
||||||
let this = ILIAS { opt, ignore, user, pass, client };
|
let this = ILIAS {
|
||||||
|
opt,
|
||||||
|
ignore,
|
||||||
|
user,
|
||||||
|
pass,
|
||||||
|
client,
|
||||||
|
};
|
||||||
info!("Logging into ILIAS using KIT account..");
|
info!("Logging into ILIAS using KIT account..");
|
||||||
let session_establishment = this.client
|
let session_establishment = this
|
||||||
|
.client
|
||||||
.post("https://ilias.studium.kit.edu/Shibboleth.sso/Login")
|
.post("https://ilias.studium.kit.edu/Shibboleth.sso/Login")
|
||||||
.form(&json!({
|
.form(&json!({
|
||||||
"sendLogin": "1",
|
"sendLogin": "1",
|
||||||
@ -58,29 +66,33 @@ impl ILIAS {
|
|||||||
"target": "/shib_login.php?target=",
|
"target": "/shib_login.php?target=",
|
||||||
"home_organization_selection": "Mit KIT-Account anmelden"
|
"home_organization_selection": "Mit KIT-Account anmelden"
|
||||||
}))
|
}))
|
||||||
.send().await?;
|
.send()
|
||||||
|
.await?;
|
||||||
let url = session_establishment.url().clone();
|
let url = session_establishment.url().clone();
|
||||||
let text = session_establishment.text().await?;
|
let text = session_establishment.text().await?;
|
||||||
let dom_sso = Html::parse_document(text.as_str());
|
let dom_sso = Html::parse_document(text.as_str());
|
||||||
let csrf_token = dom_sso
|
let csrf_token = dom_sso
|
||||||
.select(&Selector::parse(r#"input[name="csrf_token"]"#).unwrap())
|
.select(&Selector::parse(r#"input[name="csrf_token"]"#).unwrap())
|
||||||
.next().context("no csrf token")?;
|
.next()
|
||||||
|
.context("no CSRF token found")?
|
||||||
|
.value().attr("value").context("no CSRF token value")?;
|
||||||
info!("Logging into Shibboleth..");
|
info!("Logging into Shibboleth..");
|
||||||
let login_response = this.client
|
let login_response = this
|
||||||
|
.client
|
||||||
.post(url)
|
.post(url)
|
||||||
.form(&json!({
|
.form(&json!({
|
||||||
"j_username": &this.user,
|
"j_username": &this.user,
|
||||||
"j_password": &this.pass,
|
"j_password": &this.pass,
|
||||||
"_eventId_proceed": "",
|
"_eventId_proceed": "",
|
||||||
"csrf_token": csrf_token.value().attr("value").context("no csrf token")?,
|
"csrf_token": csrf_token,
|
||||||
}))
|
}))
|
||||||
.send().await?
|
.send()
|
||||||
.text().await?;
|
.await?
|
||||||
|
.text()
|
||||||
|
.await?;
|
||||||
let dom = Html::parse_document(&login_response);
|
let dom = Html::parse_document(&login_response);
|
||||||
let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap();
|
let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap();
|
||||||
let saml = dom
|
let saml = dom.select(&saml).next().context("no SAML response, incorrect password?")?;
|
||||||
.select(&saml)
|
|
||||||
.next().context("no SAML response, incorrect password?")?;
|
|
||||||
let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap();
|
let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap();
|
||||||
let relay_state = dom.select(&relay_state).next().context("no relay state")?;
|
let relay_state = dom.select(&relay_state).next().context("no relay state")?;
|
||||||
info!("Logging into ILIAS..");
|
info!("Logging into ILIAS..");
|
||||||
@ -90,7 +102,8 @@ impl ILIAS {
|
|||||||
"SAMLResponse": saml.value().attr("value").context("no SAML value")?,
|
"SAMLResponse": saml.value().attr("value").context("no SAML value")?,
|
||||||
"RelayState": relay_state.value().attr("value").context("no RelayState value")?
|
"RelayState": relay_state.value().attr("value").context("no RelayState value")?
|
||||||
}))
|
}))
|
||||||
.send().await?;
|
.send()
|
||||||
|
.await?;
|
||||||
success!("Logged in!");
|
success!("Logged in!");
|
||||||
Ok(this)
|
Ok(this)
|
||||||
}
|
}
|
||||||
@ -111,9 +124,9 @@ impl ILIAS {
|
|||||||
Ok(x) => return Ok(x),
|
Ok(x) => return Ok(x),
|
||||||
Err(e) if attempt <= 3 && error_is_http2(&e) => {
|
Err(e) if attempt <= 3 && error_is_http2(&e) => {
|
||||||
warning!(1; "encountered HTTP/2 NO_ERROR, retrying download..");
|
warning!(1; "encountered HTTP/2 NO_ERROR, retrying download..");
|
||||||
continue
|
continue;
|
||||||
},
|
},
|
||||||
Err(e) => return Err(e.into())
|
Err(e) => return Err(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unreachable!()
|
unreachable!()
|
||||||
@ -128,9 +141,9 @@ impl ILIAS {
|
|||||||
Ok(x) => return Ok(x),
|
Ok(x) => return Ok(x),
|
||||||
Err(e) if attempt <= 3 && error_is_http2(&e) => {
|
Err(e) if attempt <= 3 && error_is_http2(&e) => {
|
||||||
warning!(1; "encountered HTTP/2 NO_ERROR, retrying HEAD request..");
|
warning!(1; "encountered HTTP/2 NO_ERROR, retrying HEAD request..");
|
||||||
continue
|
continue;
|
||||||
},
|
},
|
||||||
Err(e) => return Err(e)
|
Err(e) => return Err(e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unreachable!()
|
unreachable!()
|
||||||
@ -159,10 +172,8 @@ impl ILIAS {
|
|||||||
pub fn get_items(html: &Html) -> Vec<Result<Object>> {
|
pub fn get_items(html: &Html) -> Vec<Result<Object>> {
|
||||||
html.select(&container_items)
|
html.select(&container_items)
|
||||||
.flat_map(|item| {
|
.flat_map(|item| {
|
||||||
item.select(&container_item_title)
|
item.select(&container_item_title).next().map(|link| Object::from_link(item, link))
|
||||||
.next()
|
// items without links are ignored
|
||||||
.map(|link| Object::from_link(item, link))
|
|
||||||
// items without links are ignored
|
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
@ -172,11 +183,14 @@ impl ILIAS {
|
|||||||
let html = self.get_html(&url.url).await?;
|
let html = self.get_html(&url.url).await?;
|
||||||
|
|
||||||
let main_text = if let Some(el) = html.select(&il_content_container).next() {
|
let main_text = if let Some(el) = html.select(&il_content_container).next() {
|
||||||
|
if !el
|
||||||
if !el.children().flat_map(|x| x.value().as_element()).next()
|
.children()
|
||||||
.map(|x| x.attr("class").unwrap_or_default()
|
.flat_map(|x| x.value().as_element())
|
||||||
.contains("ilContainerBlock")).unwrap_or(false)
|
.next()
|
||||||
&& el.inner_html().len() > 40 {
|
.map(|x| x.attr("class").unwrap_or_default().contains("ilContainerBlock"))
|
||||||
|
.unwrap_or(false)
|
||||||
|
&& el.inner_html().len() > 40
|
||||||
|
{
|
||||||
// ^ minimum length of useful content?
|
// ^ minimum length of useful content?
|
||||||
Some(el.inner_html())
|
Some(el.inner_html())
|
||||||
} else {
|
} else {
|
||||||
@ -198,7 +212,7 @@ impl ILIAS {
|
|||||||
);
|
);
|
||||||
let html = self.get_html_fragment(&url).await?;
|
let html = self.get_html_fragment(&url).await?;
|
||||||
let mut items = Vec::new();
|
let mut items = Vec::new();
|
||||||
for link in html.select(&a) {
|
for link in html.select(&LINKS) {
|
||||||
if link.value().attr("href").is_some() {
|
if link.value().attr("href").is_some() {
|
||||||
items.push(Object::from_link(link, link)?);
|
items.push(Object::from_link(link, link)?);
|
||||||
} // else: disabled course
|
} // else: disabled course
|
||||||
@ -243,7 +257,7 @@ impl Object {
|
|||||||
| Generic { name, .. } => &name,
|
| Generic { name, .. } => &name,
|
||||||
Thread { url } => &url.thr_pk.as_ref().unwrap(),
|
Thread { url } => &url.thr_pk.as_ref().unwrap(),
|
||||||
Video { url } => &url.url,
|
Video { url } => &url.url,
|
||||||
PersonalDesktop { url } => url.cmd.as_ref().unwrap()
|
PersonalDesktop { .. } => panic!("name of personal desktop requested (this should never happen)"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,25 +300,18 @@ impl Object {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_dir(&self) -> bool {
|
pub fn is_dir(&self) -> bool {
|
||||||
matches!(self,
|
matches!(
|
||||||
|
self,
|
||||||
Course { .. }
|
Course { .. }
|
||||||
| Folder { .. }
|
| Folder { .. } | PersonalDesktop { .. }
|
||||||
| PersonalDesktop { .. }
|
| Forum { .. } | Thread { .. }
|
||||||
| Forum { .. }
|
| Wiki { .. } | ExerciseHandler { .. }
|
||||||
| Thread { .. }
|
| PluginDispatch { .. }
|
||||||
| Wiki { .. }
|
|
||||||
| ExerciseHandler { .. }
|
|
||||||
| PluginDispatch { .. }
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_link(item: ElementRef, link: ElementRef) -> Result<Self> {
|
pub fn from_link(item: ElementRef, link: ElementRef) -> Result<Self> {
|
||||||
let name = link
|
let name = link.text().collect::<String>().replace('/', "-").trim().to_owned();
|
||||||
.text()
|
|
||||||
.collect::<String>()
|
|
||||||
.replace('/', "-")
|
|
||||||
.trim()
|
|
||||||
.to_owned();
|
|
||||||
let url = URL::from_href(link.value().attr("href").context("link missing href")?)?;
|
let url = URL::from_href(link.value().attr("href").context("link missing href")?)?;
|
||||||
Object::from_url(url, name, Some(item))
|
Object::from_url(url, name, Some(item))
|
||||||
}
|
}
|
||||||
@ -314,10 +321,7 @@ impl Object {
|
|||||||
return Ok(Thread { url });
|
return Ok(Thread { url });
|
||||||
}
|
}
|
||||||
|
|
||||||
if url
|
if url.url.starts_with("https://ilias.studium.kit.edu/goto.php") {
|
||||||
.url
|
|
||||||
.starts_with("https://ilias.studium.kit.edu/goto.php")
|
|
||||||
{
|
|
||||||
let target = url.target.as_deref().unwrap_or("NONE");
|
let target = url.target.as_deref().unwrap_or("NONE");
|
||||||
if target.starts_with("wiki_") {
|
if target.starts_with("wiki_") {
|
||||||
return Ok(Wiki {
|
return Ok(Wiki {
|
||||||
@ -356,11 +360,7 @@ impl Object {
|
|||||||
} else {
|
} else {
|
||||||
let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop);
|
let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop);
|
||||||
let ext = item_props.next().context("cannot find file extension")?;
|
let ext = item_props.next().context("cannot find file extension")?;
|
||||||
let version = item_props
|
let version = item_props.nth(1).context("cannot find 3rd file metadata")?.text().collect::<String>();
|
||||||
.nth(1)
|
|
||||||
.context("cannot find 3rd file metadata")?
|
|
||||||
.text()
|
|
||||||
.collect::<String>();
|
|
||||||
let version = version.trim();
|
let version = version.trim();
|
||||||
if let Some(v) = version.strip_prefix("Version: ") {
|
if let Some(v) = version.strip_prefix("Version: ") {
|
||||||
name += "_v";
|
name += "_v";
|
||||||
|
193
src/main.rs
193
src/main.rs
@ -1,6 +1,6 @@
|
|||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
#![allow(clippy::comparison_to_empty, clippy::upper_case_acronyms)]
|
#![allow(clippy::upper_case_acronyms)]
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use colored::Colorize;
|
use colored::Colorize;
|
||||||
@ -13,17 +13,17 @@ use indicatif::{ProgressDrawTarget, ProgressStyle};
|
|||||||
use once_cell::sync::{Lazy, OnceCell};
|
use once_cell::sync::{Lazy, OnceCell};
|
||||||
use scraper::Html;
|
use scraper::Html;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
use tokio::{fs, sync::Semaphore, time};
|
|
||||||
use tokio::task::{self, JoinHandle};
|
use tokio::task::{self, JoinHandle};
|
||||||
|
use tokio::{fs, sync::Semaphore, time};
|
||||||
use tokio_util::io::StreamReader;
|
use tokio_util::io::StreamReader;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::future::Future;
|
use std::future::Future;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::atomic::Ordering;
|
use std::sync::atomic::Ordering;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
pub const ILIAS_URL: &str = "https://ilias.studium.kit.edu/";
|
pub const ILIAS_URL: &str = "https://ilias.studium.kit.edu/";
|
||||||
|
|
||||||
@ -72,8 +72,8 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
let _ = colored::control::set_virtual_terminal(true);
|
let _ = colored::control::set_virtual_terminal(true);
|
||||||
|
|
||||||
// use UNC paths on Windows
|
|
||||||
create_dir(&opt.output).await.context("failed to create output directory")?;
|
create_dir(&opt.output).await.context("failed to create output directory")?;
|
||||||
|
// use UNC paths on Windows (#6)
|
||||||
opt.output = fs::canonicalize(opt.output).await.context("failed to canonicalize output directory")?;
|
opt.output = fs::canonicalize(opt.output).await.context("failed to canonicalize output directory")?;
|
||||||
|
|
||||||
// load .iliasignore file
|
// load .iliasignore file
|
||||||
@ -107,8 +107,10 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
if ilias.opt.content_tree {
|
if ilias.opt.content_tree {
|
||||||
// need this to get the content tree
|
if let Err(e) = ilias
|
||||||
if let Err(e) = ilias.download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1").await {
|
.download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=tree&ref_id=1")
|
||||||
|
.await
|
||||||
|
{
|
||||||
warning!("could not enable content tree:", e);
|
warning!("could not enable content tree:", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -123,8 +125,10 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
PROGRESS_BAR.set_message("initializing..");
|
PROGRESS_BAR.set_message("initializing..");
|
||||||
}
|
}
|
||||||
|
|
||||||
// default sync URL: main personal desktop
|
let sync_url = ilias.opt.sync_url.clone().unwrap_or_else(|| {
|
||||||
let sync_url = ilias.opt.sync_url.clone().unwrap_or_else(|| format!("{}ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems", ILIAS_URL));
|
// default sync URL: main personal desktop
|
||||||
|
format!("{}ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems", ILIAS_URL)
|
||||||
|
});
|
||||||
let obj = Object::from_url(URL::from_href(&sync_url).context("invalid sync URL")?, String::new(), None).context("invalid sync object")?; // name can be empty for first element
|
let obj = Object::from_url(URL::from_href(&sync_url).context("invalid sync URL")?, String::new(), None).context("invalid sync object")?; // name can be empty for first element
|
||||||
spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
|
spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
|
||||||
|
|
||||||
@ -134,13 +138,14 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
error!(e)
|
error!(e)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
break;
|
break; // channel is empty => all tasks are completed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// channel is empty => all tasks are completed
|
|
||||||
if ilias.opt.content_tree {
|
if ilias.opt.content_tree {
|
||||||
// restore fast page loading times
|
if let Err(e) = ilias
|
||||||
if let Err(e) = ilias.download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1").await {
|
.download("ilias.php?baseClass=ilRepositoryGUI&cmd=frameset&set_mode=flat&ref_id=1")
|
||||||
|
.await
|
||||||
|
{
|
||||||
warning!("could not disable content tree:", e);
|
warning!("could not disable content tree:", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -153,21 +158,19 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
|
|
||||||
// https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229
|
// https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229
|
||||||
#[allow(clippy::manual_async_fn)]
|
#[allow(clippy::manual_async_fn)]
|
||||||
fn process_gracefully(
|
fn process_gracefully(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl Future<Output = ()> + Send {
|
||||||
ilias: Arc<ILIAS>,
|
async move {
|
||||||
path: PathBuf,
|
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
||||||
obj: Object,
|
PROGRESS_BAR.inc_length(1);
|
||||||
) -> impl Future<Output = ()> + Send { async move {
|
}
|
||||||
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
let permit = TASKS_RUNNING.acquire().await.unwrap();
|
||||||
PROGRESS_BAR.inc_length(1);
|
let path_text = path.to_string_lossy().into_owned();
|
||||||
|
if let Err(e) = process(ilias, path, obj).await.context("failed to process URL") {
|
||||||
|
error!("Syncing {}", path_text; e);
|
||||||
|
}
|
||||||
|
drop(permit);
|
||||||
}
|
}
|
||||||
let permit = TASKS_RUNNING.acquire().await.unwrap();
|
}
|
||||||
let path_text = path.to_string_lossy().into_owned();
|
|
||||||
if let Err(e) = process(ilias, path, obj).await.context("failed to process URL") {
|
|
||||||
error!("Syncing {}", path_text; e);
|
|
||||||
}
|
|
||||||
drop(permit);
|
|
||||||
}}
|
|
||||||
|
|
||||||
async fn handle_gracefully(fut: impl Future<Output = Result<()>>) {
|
async fn handle_gracefully(fut: impl Future<Output = Result<()>>) {
|
||||||
if let Err(e) = fut.await {
|
if let Err(e) = fut.await {
|
||||||
@ -181,11 +184,11 @@ mod selectors {
|
|||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use scraper::Selector;
|
use scraper::Selector;
|
||||||
// construct CSS selectors once
|
// construct CSS selectors once
|
||||||
pub static a: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
pub static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
pub static a_target_blank: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap());
|
pub static a_target_blank: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap());
|
||||||
pub static img: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap());
|
pub static IMAGES: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap());
|
||||||
pub static table: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap());
|
pub static TABLES: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap());
|
||||||
pub static video_tr: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap());
|
pub static VIDEO_ROWS: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap());
|
||||||
pub static links_in_table: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap());
|
pub static links_in_table: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap());
|
||||||
pub static th: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap());
|
pub static th: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap());
|
||||||
pub static td: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
|
pub static td: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
|
||||||
@ -253,7 +256,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
};
|
};
|
||||||
if let Some(s) = content.1.as_ref() {
|
if let Some(s) = content.1.as_ref() {
|
||||||
let path = path.join("course.html");
|
let path = path.join("course.html");
|
||||||
write_file_data(&path, &mut s.as_bytes()).await.context("failed to write course page html")?;
|
write_file_data(&path, &mut s.as_bytes())
|
||||||
|
.await
|
||||||
|
.context("failed to write course page html")?;
|
||||||
}
|
}
|
||||||
for item in content.0 {
|
for item in content.0 {
|
||||||
let item = item?;
|
let item = item?;
|
||||||
@ -266,7 +271,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let content = ilias.get_course_content(&url).await?;
|
let content = ilias.get_course_content(&url).await?;
|
||||||
if let Some(s) = content.1.as_ref() {
|
if let Some(s) = content.1.as_ref() {
|
||||||
let path = path.join("folder.html");
|
let path = path.join("folder.html");
|
||||||
write_file_data(&path, &mut s.as_bytes()).await.context("failed to write folder page html")?;
|
write_file_data(&path, &mut s.as_bytes())
|
||||||
|
.await
|
||||||
|
.context("failed to write folder page html")?;
|
||||||
}
|
}
|
||||||
for item in content.0 {
|
for item in content.0 {
|
||||||
let item = item?;
|
let item = item?;
|
||||||
@ -284,9 +291,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
let data = ilias.download(&url.url).await?;
|
let data = ilias.download(&url.url).await?;
|
||||||
let mut reader = StreamReader::new(data.bytes_stream().map_err(|x| {
|
let mut reader = StreamReader::new(data.bytes_stream().map_err(|x| io::Error::new(io::ErrorKind::Other, x)));
|
||||||
io::Error::new(io::ErrorKind::Other, x)
|
|
||||||
}));
|
|
||||||
log!(0, "Writing {}", relative_path.to_string_lossy());
|
log!(0, "Writing {}", relative_path.to_string_lossy());
|
||||||
write_file_data(&path, &mut reader).await?;
|
write_file_data(&path, &mut reader).await?;
|
||||||
},
|
},
|
||||||
@ -301,10 +306,12 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let data = ilias.download(&list_url).await?;
|
let data = ilias.download(&list_url).await?;
|
||||||
let html = data.text().await?;
|
let html = data.text().await?;
|
||||||
let html = Html::parse_fragment(&html);
|
let html = Html::parse_fragment(&html);
|
||||||
html.select(&a)
|
html.select(&LINKS)
|
||||||
.filter_map(|link| link.value().attr("href"))
|
.filter_map(|link| link.value().attr("href"))
|
||||||
.filter(|href| href.contains("trows=800"))
|
.filter(|href| href.contains("trows=800"))
|
||||||
.map(|x| x.to_string()).next().context("video list link not found")?
|
.map(|x| x.to_string())
|
||||||
|
.next()
|
||||||
|
.context("video list link not found")?
|
||||||
};
|
};
|
||||||
log!(1, "Rewriting {}", full_url);
|
log!(1, "Rewriting {}", full_url);
|
||||||
let mut full_url = Url::parse(&format!("{}{}", ILIAS_URL, full_url))?;
|
let mut full_url = Url::parse(&format!("{}{}", ILIAS_URL, full_url))?;
|
||||||
@ -322,7 +329,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let data = ilias.download(full_url.as_str()).await?;
|
let data = ilias.download(full_url.as_str()).await?;
|
||||||
let html = data.text().await?;
|
let html = data.text().await?;
|
||||||
let html = Html::parse_fragment(&html);
|
let html = Html::parse_fragment(&html);
|
||||||
for row in html.select(&video_tr) {
|
for row in html.select(&VIDEO_ROWS) {
|
||||||
let link = row.select(&a_target_blank).next();
|
let link = row.select(&a_target_blank).next();
|
||||||
if link.is_none() {
|
if link.is_none() {
|
||||||
if !row.text().any(|x| x == NO_ENTRIES) {
|
if !row.text().any(|x| x == NO_ENTRIES) {
|
||||||
@ -342,17 +349,10 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
path.push(format!("{}.mp4", file_escape(title)));
|
path.push(format!("{}.mp4", file_escape(title)));
|
||||||
log!(1, "Found video: {}", title);
|
log!(1, "Found video: {}", title);
|
||||||
let video = Video {
|
let video = Video {
|
||||||
url: URL::raw(
|
url: URL::raw(link.value().attr("href").context("video link without href")?.to_owned()),
|
||||||
link.value()
|
|
||||||
.attr("href")
|
|
||||||
.context("video link without href")?
|
|
||||||
.to_owned(),
|
|
||||||
),
|
|
||||||
};
|
};
|
||||||
let ilias = Arc::clone(&ilias);
|
let ilias = Arc::clone(&ilias);
|
||||||
spawn!(async {
|
spawn!(process_gracefully(ilias, path, video));
|
||||||
process_gracefully(ilias, path, video).await;
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -372,10 +372,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let mut json_capture = XOCT_REGEX.captures_iter(&html);
|
let mut json_capture = XOCT_REGEX.captures_iter(&html);
|
||||||
let json = &json_capture.next().context("xoct player json not found")?[1];
|
let json = &json_capture.next().context("xoct player json not found")?[1];
|
||||||
log!(2, "{}", json);
|
log!(2, "{}", json);
|
||||||
let json = json
|
let json = json.split(",\n").next().context("invalid xoct player json")?;
|
||||||
.split(",\n")
|
|
||||||
.next()
|
|
||||||
.context("invalid xoct player json")?;
|
|
||||||
serde_json::from_str(&json.trim())?
|
serde_json::from_str(&json.trim())?
|
||||||
};
|
};
|
||||||
log!(2, "{}", json);
|
log!(2, "{}", json);
|
||||||
@ -386,10 +383,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
.context("video src not string")?;
|
.context("video src not string")?;
|
||||||
let meta = fs::metadata(&path).await;
|
let meta = fs::metadata(&path).await;
|
||||||
if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos {
|
if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos {
|
||||||
let head = ilias
|
let head = ilias.head(url).await.context("HEAD request failed")?;
|
||||||
.head(url)
|
|
||||||
.await
|
|
||||||
.context("HEAD request failed")?;
|
|
||||||
if let Some(len) = head.headers().get("content-length") {
|
if let Some(len) = head.headers().get("content-length") {
|
||||||
if meta?.len() != len.to_str()?.parse::<u64>()? {
|
if meta?.len() != len.to_str()?.parse::<u64>()? {
|
||||||
warning!(relative_path.to_string_lossy(), "was updated, consider moving the outdated file");
|
warning!(relative_path.to_string_lossy(), "was updated, consider moving the outdated file");
|
||||||
@ -397,10 +391,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let resp = ilias.download(&url).await?;
|
let resp = ilias.download(&url).await?;
|
||||||
let mut reader = StreamReader::new(
|
let mut reader = StreamReader::new(resp.bytes_stream().map_err(|x| io::Error::new(io::ErrorKind::Other, x)));
|
||||||
resp.bytes_stream()
|
|
||||||
.map_err(|x| io::Error::new(io::ErrorKind::Other, x)),
|
|
||||||
);
|
|
||||||
log!(0, "Writing {}", relative_path.to_string_lossy());
|
log!(0, "Writing {}", relative_path.to_string_lossy());
|
||||||
write_file_data(&path, &mut reader).await?;
|
write_file_data(&path, &mut reader).await?;
|
||||||
}
|
}
|
||||||
@ -415,9 +406,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let html_text = data.await?.text().await?;
|
let html_text = data.await?.text().await?;
|
||||||
let url = {
|
let url = {
|
||||||
let html = Html::parse_document(&html_text);
|
let html = Html::parse_document(&html_text);
|
||||||
let thread_count_selector = html.select(&a)
|
let thread_count_selector = html.select(&LINKS).flat_map(|x| x.value().attr("href")).find(|x| x.contains("trows=800"));
|
||||||
.flat_map(|x| x.value().attr("href"))
|
|
||||||
.find(|x| x.contains("trows=800"));
|
|
||||||
if thread_count_selector.is_none() {
|
if thread_count_selector.is_none() {
|
||||||
if let Some(cell) = html.select(&td).next() {
|
if let Some(cell) = html.select(&td).next() {
|
||||||
if cell.text().any(|x| x == NO_ENTRIES) {
|
if cell.text().any(|x| x == NO_ENTRIES) {
|
||||||
@ -448,19 +437,12 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let link = cells[1]
|
let link = cells[1].select(&LINKS).next().context("thread link not found")?;
|
||||||
.select(&a)
|
|
||||||
.next()
|
|
||||||
.context("thread link not found")?;
|
|
||||||
let object = Object::from_link(link, link)?;
|
let object = Object::from_link(link, link)?;
|
||||||
let mut path = path.clone();
|
let mut path = path.clone();
|
||||||
let name = format!(
|
let name = format!(
|
||||||
"{}_{}",
|
"{}_{}",
|
||||||
object
|
object.url().thr_pk.as_ref().context("thr_pk not found for thread")?,
|
||||||
.url()
|
|
||||||
.thr_pk
|
|
||||||
.as_ref()
|
|
||||||
.context("thr_pk not found for thread")?,
|
|
||||||
link.text().collect::<String>().trim()
|
link.text().collect::<String>().trim()
|
||||||
);
|
);
|
||||||
path.push(file_escape(&name));
|
path.push(file_escape(&name));
|
||||||
@ -504,15 +486,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
.context("post title not found")?
|
.context("post title not found")?
|
||||||
.text()
|
.text()
|
||||||
.collect::<String>();
|
.collect::<String>();
|
||||||
let author = post
|
let author = post.select(&span_small).next().context("post author not found")?;
|
||||||
.select(&span_small)
|
|
||||||
.next()
|
|
||||||
.context("post author not found")?;
|
|
||||||
let author = author.text().collect::<String>();
|
let author = author.text().collect::<String>();
|
||||||
let author = author
|
let author = author.trim().split('|').collect::<Vec<_>>();
|
||||||
.trim()
|
|
||||||
.split('|')
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let author = if author.len() == 2 {
|
let author = if author.len() == 2 {
|
||||||
author[0] // pseudonymous forum
|
author[0] // pseudonymous forum
|
||||||
} else if author.len() == 3 {
|
} else if author.len() == 3 {
|
||||||
@ -523,36 +499,26 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err(anyhow!("author data in unknown format"));
|
return Err(anyhow!("author data in unknown format"));
|
||||||
}.trim();
|
}
|
||||||
let container = post
|
.trim();
|
||||||
.select(&post_container)
|
let container = post.select(&post_container).next().context("post container not found")?;
|
||||||
.next()
|
let link = container.select(&LINKS).next().context("post link not found")?;
|
||||||
.context("post container not found")?;
|
let id = link.value().attr("id").context("no id in thread link")?.to_owned();
|
||||||
let link = container.select(&a).next().context("post link not found")?;
|
|
||||||
let id = link
|
|
||||||
.value()
|
|
||||||
.attr("id")
|
|
||||||
.context("no id in thread link")?
|
|
||||||
.to_owned();
|
|
||||||
let name = format!("{}_{}_{}.html", id, author, title.trim());
|
let name = format!("{}_{}_{}.html", id, author, title.trim());
|
||||||
let data = container.inner_html();
|
let data = container.inner_html();
|
||||||
let path = path.join(file_escape(&name));
|
let path = path.join(file_escape(&name));
|
||||||
let relative_path = relative_path.join(file_escape(&name));
|
let relative_path = relative_path.join(file_escape(&name));
|
||||||
spawn!(handle_gracefully(async move {
|
spawn!(handle_gracefully(async move {
|
||||||
log!(0, "Writing {}", relative_path.display());
|
log!(0, "Writing {}", relative_path.display());
|
||||||
write_file_data(&path, &mut data.as_bytes())
|
write_file_data(&path, &mut data.as_bytes()).await.context("failed to write forum post")
|
||||||
.await
|
|
||||||
.context("failed to write forum post")
|
|
||||||
}));
|
}));
|
||||||
let images = container
|
let images = container.select(&IMAGES).map(|x| x.value().attr("src").map(|x| x.to_owned()));
|
||||||
.select(&img)
|
|
||||||
.map(|x| x.value().attr("src").map(|x| x.to_owned()));
|
|
||||||
for image in images {
|
for image in images {
|
||||||
let image = image.context("no src on image")?;
|
let image = image.context("no src on image")?;
|
||||||
all_images.push((id.clone(), image));
|
all_images.push((id.clone(), image));
|
||||||
}
|
}
|
||||||
if let Some(container) = container.select(&post_attachments).next() {
|
if let Some(container) = container.select(&post_attachments).next() {
|
||||||
for attachment in container.select(&a) {
|
for attachment in container.select(&LINKS) {
|
||||||
attachments.push((
|
attachments.push((
|
||||||
id.clone(),
|
id.clone(),
|
||||||
attachment.text().collect::<String>(),
|
attachment.text().collect::<String>(),
|
||||||
@ -562,16 +528,14 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// pagination
|
// pagination
|
||||||
if let Some(pages) = html.select(&table).next() {
|
if let Some(pages) = html.select(&TABLES).next() {
|
||||||
if let Some(last) = pages.select(&links_in_table).last() {
|
if let Some(last) = pages.select(&links_in_table).last() {
|
||||||
let text = last.text().collect::<String>();
|
let text = last.text().collect::<String>();
|
||||||
if text.trim() == ">>" {
|
if text.trim() == ">>" {
|
||||||
// not last page yet
|
// not last page yet
|
||||||
let ilias = Arc::clone(&ilias);
|
let ilias = Arc::clone(&ilias);
|
||||||
let next_page = Thread {
|
let next_page = Thread {
|
||||||
url: URL::from_href(
|
url: URL::from_href(last.value().attr("href").context("page link not found")?)?,
|
||||||
last.value().attr("href").context("page link not found")?,
|
|
||||||
)?,
|
|
||||||
};
|
};
|
||||||
spawn!(process_gracefully(ilias, path.clone(), next_page));
|
spawn!(process_gracefully(ilias, path.clone(), next_page));
|
||||||
}
|
}
|
||||||
@ -622,7 +586,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let html = ilias.get_html(&url.url).await?;
|
let html = ilias.get_html(&url.url).await?;
|
||||||
let mut filenames = HashSet::new();
|
let mut filenames = HashSet::new();
|
||||||
for row in html.select(&form_group) {
|
for row in html.select(&form_group) {
|
||||||
let link = row.select(&a).next();
|
let link = row.select(&LINKS).next();
|
||||||
if link.is_none() {
|
if link.is_none() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -634,10 +598,7 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let href = href.unwrap();
|
let href = href.unwrap();
|
||||||
let url = URL::from_href(href)?;
|
let url = URL::from_href(href)?;
|
||||||
let cmd = url.cmd.as_deref().unwrap_or("");
|
let cmd = url.cmd.as_deref().unwrap_or("");
|
||||||
if cmd != "downloadFile"
|
if cmd != "downloadFile" && cmd != "downloadGlobalFeedbackFile" && cmd != "downloadFeedbackFile" {
|
||||||
&& cmd != "downloadGlobalFeedbackFile"
|
|
||||||
&& cmd != "downloadFeedbackFile"
|
|
||||||
{
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// link is definitely just a download link to the exercise or the solution
|
// link is definitely just a download link to the exercise or the solution
|
||||||
@ -660,10 +621,10 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
let mut i = 1;
|
let mut i = 1;
|
||||||
while filenames.contains(&unique_filename) {
|
while filenames.contains(&unique_filename) {
|
||||||
i += 1;
|
i += 1;
|
||||||
if name != "" {
|
if name.is_empty() {
|
||||||
unique_filename = format!("{}{}.{}", name, i, extension);
|
|
||||||
} else {
|
|
||||||
unique_filename = format!("{}{}", extension, i);
|
unique_filename = format!("{}{}", extension, i);
|
||||||
|
} else {
|
||||||
|
unique_filename = format!("{}{}.{}", name, i, extension);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
filenames.insert(unique_filename.clone());
|
filenames.insert(unique_filename.clone());
|
||||||
@ -691,17 +652,9 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
|
|
||||||
let urls = {
|
let urls = {
|
||||||
let html = ilias.get_html(url).await?;
|
let html = ilias.get_html(url).await?;
|
||||||
html.select(&a)
|
html.select(&LINKS)
|
||||||
.filter_map(|x| {
|
.filter_map(|x| x.value().attr("href").map(|y| (y, x.text().collect::<String>())))
|
||||||
x.value()
|
.map(|(x, y)| URL::from_href(x).map(|z| (z, y.trim().to_owned())).context("parsing weblink"))
|
||||||
.attr("href")
|
|
||||||
.map(|y| (y, x.text().collect::<String>()))
|
|
||||||
})
|
|
||||||
.map(|(x, y)| {
|
|
||||||
URL::from_href(x)
|
|
||||||
.map(|z| (z, y.trim().to_owned()))
|
|
||||||
.context("parsing weblink")
|
|
||||||
})
|
|
||||||
.collect::<Result<Vec<_>>>()
|
.collect::<Result<Vec<_>>>()
|
||||||
}?;
|
}?;
|
||||||
|
|
||||||
|
@ -9,8 +9,10 @@ use std::path::Path;
|
|||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// Write all data to the specified path. Will overwrite previous file data.
|
/// Write all data to the specified path. Will overwrite previous file data.
|
||||||
pub async fn write_file_data<R: ?Sized>(path: impl AsRef<Path>, data: &mut R) -> Result<()>
|
pub async fn write_file_data<R: ?Sized>(path: impl AsRef<Path>, data: &mut R) -> Result<()>
|
||||||
where R: AsyncRead + Unpin {
|
where
|
||||||
|
R: AsyncRead + Unpin,
|
||||||
|
{
|
||||||
let file = AsyncFile::create(path.as_ref()).await.context("failed to create file")?;
|
let file = AsyncFile::create(path.as_ref()).await.context("failed to create file")?;
|
||||||
let mut file = BufWriter::new(file);
|
let mut file = BufWriter::new(file);
|
||||||
tokio::io::copy(data, &mut file).await.context("failed to write to file")?;
|
tokio::io::copy(data, &mut file).await.context("failed to write to file")?;
|
||||||
|
Loading…
Reference in New Issue
Block a user