mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Split downloading logic into modules
This commit is contained in:
parent
0ebe5bc3cf
commit
e7354e0ad1
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -8,6 +8,7 @@ version = "0.2.21"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"atty",
|
"atty",
|
||||||
|
"bytes",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"colored",
|
"colored",
|
||||||
"cookie_store 0.14.1",
|
"cookie_store 0.14.1",
|
||||||
@ -355,11 +356,10 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-utils"
|
name = "crossbeam-utils"
|
||||||
version = "0.8.4"
|
version = "0.8.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4feb231f0d4d6af81aed15928e58ecf5816aa62a2393e2c82f46973e92a9a278"
|
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
]
|
]
|
||||||
|
@ -32,6 +32,7 @@ atty = "0.2.14"
|
|||||||
h2 = "0.3.3"
|
h2 = "0.3.3"
|
||||||
cookie_store = "0.14.0"
|
cookie_store = "0.14.0"
|
||||||
reqwest_cookie_store = "0.1.5"
|
reqwest_cookie_store = "0.1.5"
|
||||||
|
bytes = "1.0.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
hard_tabs = true
|
hard_tabs = true
|
||||||
match_block_trailing_comma = true
|
match_block_trailing_comma = true
|
||||||
max_width = 145
|
max_width = 120
|
||||||
|
24
src/cli.rs
24
src/cli.rs
@ -6,8 +6,6 @@ use std::sync::atomic::{AtomicBool, AtomicUsize};
|
|||||||
#[cfg(feature = "keyring-auth")]
|
#[cfg(feature = "keyring-auth")]
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
#[cfg(feature = "keyring-auth")]
|
|
||||||
use colored::Colorize as _;
|
|
||||||
use indicatif::ProgressBar;
|
use indicatif::ProgressBar;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
@ -87,6 +85,8 @@ pub static PROGRESS_BAR: Lazy<ProgressBar> = Lazy::new(|| ProgressBar::new(0));
|
|||||||
|
|
||||||
macro_rules! log {
|
macro_rules! log {
|
||||||
($lvl:expr, $($t:expr),+) => {{
|
($lvl:expr, $($t:expr),+) => {{
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use colored::Colorize as _;
|
||||||
#[allow(unused_comparisons)] // 0 <= 0
|
#[allow(unused_comparisons)] // 0 <= 0
|
||||||
if $lvl <= crate::cli::LOG_LEVEL.load(std::sync::atomic::Ordering::SeqCst) {
|
if $lvl <= crate::cli::LOG_LEVEL.load(std::sync::atomic::Ordering::SeqCst) {
|
||||||
if crate::cli::PROGRESS_BAR_ENABLED.load(std::sync::atomic::Ordering::SeqCst) {
|
if crate::cli::PROGRESS_BAR_ENABLED.load(std::sync::atomic::Ordering::SeqCst) {
|
||||||
@ -111,21 +111,21 @@ macro_rules! success {
|
|||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! warning {
|
macro_rules! warning {
|
||||||
($e:expr) => {
|
($e:expr) => {{
|
||||||
log!(0, "Warning: {}", format!("{:?}", $e).bright_yellow());
|
log!(0, "Warning: {}", format!("{:?}", $e).bright_yellow());
|
||||||
};
|
}};
|
||||||
($msg:expr, $e:expr) => {
|
($msg:expr, $e:expr) => {{
|
||||||
log!(0, "Warning: {}", format!("{} {:?}", $msg, $e).bright_yellow());
|
log!(0, "Warning: {}", format!("{} {:?}", $msg, $e).bright_yellow());
|
||||||
};
|
}};
|
||||||
($msg1:expr, $msg2:expr, $e:expr) => {
|
($msg1:expr, $msg2:expr, $e:expr) => {{
|
||||||
log!(0, "Warning: {}", format!("{} {} {:?}", $msg1, $msg2, $e).bright_yellow());
|
log!(0, "Warning: {}", format!("{} {} {:?}", $msg1, $msg2, $e).bright_yellow());
|
||||||
};
|
}};
|
||||||
(format => $($e:expr),+) => {
|
(format => $($e:expr),+) => {{
|
||||||
log!(0, "Warning: {}", format!($($e),+).bright_yellow());
|
log!(0, "Warning: {}", format!($($e),+).bright_yellow());
|
||||||
};
|
}};
|
||||||
($lvl:expr; $($e:expr),+) => {
|
($lvl:expr; $($e:expr),+) => {{
|
||||||
log!($lvl, "Warning: {}", format!($($e),+).bright_yellow());
|
log!($lvl, "Warning: {}", format!($($e),+).bright_yellow());
|
||||||
}
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! error {
|
macro_rules! error {
|
||||||
|
75
src/ilias.rs
75
src/ilias.rs
@ -3,15 +3,32 @@
|
|||||||
use std::{error::Error as _, io::Write, sync::Arc};
|
use std::{error::Error as _, io::Write, sync::Arc};
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use colored::Colorize;
|
|
||||||
use cookie_store::CookieStore;
|
use cookie_store::CookieStore;
|
||||||
use ignore::gitignore::Gitignore;
|
use ignore::gitignore::Gitignore;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
use reqwest::{Client, IntoUrl, Proxy, Url};
|
use reqwest::{Client, IntoUrl, Proxy, Url};
|
||||||
use reqwest_cookie_store::CookieStoreMutex;
|
use reqwest_cookie_store::CookieStoreMutex;
|
||||||
use scraper::{ElementRef, Html, Selector};
|
use scraper::{ElementRef, Html, Selector};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::{cli::Opt, get_request_ticket, selectors::*, ILIAS_URL};
|
use crate::{cli::Opt, queue, ILIAS_URL};
|
||||||
|
|
||||||
|
pub mod course;
|
||||||
|
pub mod exercise;
|
||||||
|
pub mod file;
|
||||||
|
pub mod folder;
|
||||||
|
pub mod forum;
|
||||||
|
pub mod plugin_dispatch;
|
||||||
|
pub mod thread;
|
||||||
|
pub mod video;
|
||||||
|
pub mod weblink;
|
||||||
|
|
||||||
|
static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
|
static ALERT_DANGER: Lazy<Selector> = Lazy::new(|| Selector::parse("div.alert-danger").unwrap());
|
||||||
|
static IL_CONTENT_CONTAINER: Lazy<Selector> = Lazy::new(|| Selector::parse("#il_center_col").unwrap());
|
||||||
|
static ITEM_PROP: Lazy<Selector> = Lazy::new(|| Selector::parse("span.il_ItemProperty").unwrap());
|
||||||
|
static CONTAINER_ITEMS: Lazy<Selector> = Lazy::new(|| Selector::parse("div.il_ContainerListItem").unwrap());
|
||||||
|
static CONTAINER_ITEM_TITLE: Lazy<Selector> = Lazy::new(|| Selector::parse("a.il_ContainerItemTitle").unwrap());
|
||||||
|
|
||||||
pub struct ILIAS {
|
pub struct ILIAS {
|
||||||
pub opt: Opt,
|
pub opt: Opt,
|
||||||
@ -38,10 +55,9 @@ fn error_is_http2(error: &reqwest::Error) -> bool {
|
|||||||
impl ILIAS {
|
impl ILIAS {
|
||||||
// TODO: de-duplicate the logic below
|
// TODO: de-duplicate the logic below
|
||||||
pub async fn with_session(opt: Opt, session: Arc<CookieStoreMutex>, ignore: Gitignore) -> Result<Self> {
|
pub async fn with_session(opt: Opt, session: Arc<CookieStoreMutex>, ignore: Gitignore) -> Result<Self> {
|
||||||
let mut builder =
|
let mut builder = Client::builder()
|
||||||
Client::builder()
|
.cookie_provider(Arc::clone(&session))
|
||||||
.cookie_provider(Arc::clone(&session))
|
.user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")));
|
||||||
.user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")));
|
|
||||||
if let Some(proxy) = opt.proxy.as_ref() {
|
if let Some(proxy) = opt.proxy.as_ref() {
|
||||||
let proxy = Proxy::all(proxy)?;
|
let proxy = Proxy::all(proxy)?;
|
||||||
builder = builder.proxy(proxy);
|
builder = builder.proxy(proxy);
|
||||||
@ -62,11 +78,9 @@ impl ILIAS {
|
|||||||
let cookie_store = CookieStore::default();
|
let cookie_store = CookieStore::default();
|
||||||
let cookie_store = reqwest_cookie_store::CookieStoreMutex::new(cookie_store);
|
let cookie_store = reqwest_cookie_store::CookieStoreMutex::new(cookie_store);
|
||||||
let cookie_store = std::sync::Arc::new(cookie_store);
|
let cookie_store = std::sync::Arc::new(cookie_store);
|
||||||
let mut builder = Client::builder().cookie_provider(Arc::clone(&cookie_store)).user_agent(concat!(
|
let mut builder = Client::builder()
|
||||||
env!("CARGO_PKG_NAME"),
|
.cookie_provider(Arc::clone(&cookie_store))
|
||||||
"/",
|
.user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")));
|
||||||
env!("CARGO_PKG_VERSION")
|
|
||||||
));
|
|
||||||
if let Some(proxy) = opt.proxy.as_ref() {
|
if let Some(proxy) = opt.proxy.as_ref() {
|
||||||
let proxy = Proxy::all(proxy)?;
|
let proxy = Proxy::all(proxy)?;
|
||||||
builder = builder.proxy(proxy);
|
builder = builder.proxy(proxy);
|
||||||
@ -118,7 +132,10 @@ impl ILIAS {
|
|||||||
.await?;
|
.await?;
|
||||||
let dom = Html::parse_document(&login_response);
|
let dom = Html::parse_document(&login_response);
|
||||||
let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap();
|
let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap();
|
||||||
let saml = dom.select(&saml).next().context("no SAML response, incorrect password?")?;
|
let saml = dom
|
||||||
|
.select(&saml)
|
||||||
|
.next()
|
||||||
|
.context("no SAML response, incorrect password?")?;
|
||||||
let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap();
|
let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap();
|
||||||
let relay_state = dom.select(&relay_state).next().context("no relay state")?;
|
let relay_state = dom.select(&relay_state).next().context("no relay state")?;
|
||||||
info!("Logging into ILIAS..");
|
info!("Logging into ILIAS..");
|
||||||
@ -136,7 +153,9 @@ impl ILIAS {
|
|||||||
|
|
||||||
pub async fn save_session(&self) -> Result<()> {
|
pub async fn save_session(&self) -> Result<()> {
|
||||||
let session_path = self.opt.output.join(".iliassession");
|
let session_path = self.opt.output.join(".iliassession");
|
||||||
let mut writer = std::fs::File::create(session_path).map(std::io::BufWriter::new).unwrap();
|
let mut writer = std::fs::File::create(session_path)
|
||||||
|
.map(std::io::BufWriter::new)
|
||||||
|
.unwrap();
|
||||||
let store = self.cookies.lock().map_err(|x| anyhow!("{}", x))?;
|
let store = self.cookies.lock().map_err(|x| anyhow!("{}", x))?;
|
||||||
// save all cookies, including session cookies
|
// save all cookies, including session cookies
|
||||||
for cookie in store.iter_unexpired().map(serde_json::to_string) {
|
for cookie in store.iter_unexpired().map(serde_json::to_string) {
|
||||||
@ -147,7 +166,7 @@ impl ILIAS {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn download(&self, url: &str) -> Result<reqwest::Response> {
|
pub async fn download(&self, url: &str) -> Result<reqwest::Response> {
|
||||||
get_request_ticket().await;
|
queue::get_request_ticket().await;
|
||||||
log!(2, "Downloading {}", url);
|
log!(2, "Downloading {}", url);
|
||||||
let url = if url.starts_with("http://") || url.starts_with("https://") {
|
let url = if url.starts_with("http://") || url.starts_with("https://") {
|
||||||
url.to_owned()
|
url.to_owned()
|
||||||
@ -171,7 +190,7 @@ impl ILIAS {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn head<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response, reqwest::Error> {
|
pub async fn head<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response, reqwest::Error> {
|
||||||
get_request_ticket().await;
|
queue::get_request_ticket().await;
|
||||||
let url = url.into_url()?;
|
let url = url.into_url()?;
|
||||||
for attempt in 1..10 {
|
for attempt in 1..10 {
|
||||||
let result = self.client.head(url.clone()).send().await;
|
let result = self.client.head(url.clone()).send().await;
|
||||||
@ -199,7 +218,7 @@ impl ILIAS {
|
|||||||
}
|
}
|
||||||
let text = self.download(url).await?.text().await?;
|
let text = self.download(url).await?.text().await?;
|
||||||
let html = Html::parse_document(&text);
|
let html = Html::parse_document(&text);
|
||||||
if html.select(&alert_danger).next().is_some() {
|
if html.select(&ALERT_DANGER).next().is_some() {
|
||||||
Err(anyhow!("ILIAS error"))
|
Err(anyhow!("ILIAS error"))
|
||||||
} else {
|
} else {
|
||||||
Ok(html)
|
Ok(html)
|
||||||
@ -209,7 +228,7 @@ impl ILIAS {
|
|||||||
pub async fn get_html_fragment(&self, url: &str) -> Result<Html> {
|
pub async fn get_html_fragment(&self, url: &str) -> Result<Html> {
|
||||||
let text = self.download(url).await?.text().await?;
|
let text = self.download(url).await?.text().await?;
|
||||||
let html = Html::parse_fragment(&text);
|
let html = Html::parse_fragment(&text);
|
||||||
if html.select(&alert_danger).next().is_some() {
|
if html.select(&ALERT_DANGER).next().is_some() {
|
||||||
Err(anyhow!("ILIAS error"))
|
Err(anyhow!("ILIAS error"))
|
||||||
} else {
|
} else {
|
||||||
Ok(html)
|
Ok(html)
|
||||||
@ -217,9 +236,11 @@ impl ILIAS {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_items(html: &Html) -> Vec<Result<Object>> {
|
pub fn get_items(html: &Html) -> Vec<Result<Object>> {
|
||||||
html.select(&container_items)
|
html.select(&CONTAINER_ITEMS)
|
||||||
.flat_map(|item| {
|
.flat_map(|item| {
|
||||||
item.select(&container_item_title).next().map(|link| Object::from_link(item, link))
|
item.select(&CONTAINER_ITEM_TITLE)
|
||||||
|
.next()
|
||||||
|
.map(|link| Object::from_link(item, link))
|
||||||
// items without links are ignored
|
// items without links are ignored
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
@ -229,7 +250,7 @@ impl ILIAS {
|
|||||||
pub async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
pub async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
||||||
let html = self.get_html(&url.url).await?;
|
let html = self.get_html(&url.url).await?;
|
||||||
|
|
||||||
let main_text = if let Some(el) = html.select(&il_content_container).next() {
|
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
|
||||||
if !el
|
if !el
|
||||||
.children()
|
.children()
|
||||||
.flat_map(|x| x.value().as_element())
|
.flat_map(|x| x.value().as_element())
|
||||||
@ -268,6 +289,10 @@ impl ILIAS {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trait IliasObject {
|
||||||
|
fn download(ilias: Arc<ILIAS>) -> Result<()>;
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Object {
|
pub enum Object {
|
||||||
Course { name: String, url: URL },
|
Course { name: String, url: URL },
|
||||||
@ -405,9 +430,15 @@ impl Object {
|
|||||||
// download page containing metadata
|
// download page containing metadata
|
||||||
return Ok(Generic { name, url });
|
return Ok(Generic { name, url });
|
||||||
} else {
|
} else {
|
||||||
let mut item_props = item.context("can't construct file object without HTML object")?.select(&item_prop);
|
let mut item_props = item
|
||||||
|
.context("can't construct file object without HTML object")?
|
||||||
|
.select(&ITEM_PROP);
|
||||||
let ext = item_props.next().context("cannot find file extension")?;
|
let ext = item_props.next().context("cannot find file extension")?;
|
||||||
let version = item_props.nth(1).context("cannot find 3rd file metadata")?.text().collect::<String>();
|
let version = item_props
|
||||||
|
.nth(1)
|
||||||
|
.context("cannot find 3rd file metadata")?
|
||||||
|
.text()
|
||||||
|
.collect::<String>();
|
||||||
let version = version.trim();
|
let version = version.trim();
|
||||||
if let Some(v) = version.strip_prefix("Version: ") {
|
if let Some(v) = version.strip_prefix("Version: ") {
|
||||||
name += "_v";
|
name += "_v";
|
||||||
|
50
src/ilias/course.rs
Normal file
50
src/ilias/course.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
use std::{path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
process_gracefully,
|
||||||
|
queue::spawn,
|
||||||
|
util::{file_escape, write_file_data},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
static CMD_NODE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"cmdNode=uf:\w\w"#).unwrap());
|
||||||
|
|
||||||
|
pub async fn download(path: PathBuf, ilias: Arc<ILIAS>, url: &URL, name: &str) -> Result<()> {
|
||||||
|
let content = if ilias.opt.content_tree {
|
||||||
|
let html = ilias.download(&url.url).await?.text().await?;
|
||||||
|
let cmd_node = CMD_NODE_REGEX.find(&html).context("can't find cmdNode")?.as_str()[8..].to_owned();
|
||||||
|
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
|
||||||
|
match content_tree {
|
||||||
|
Ok(tree) => (tree.into_iter().map(Result::Ok).collect(), None),
|
||||||
|
Err(e) => {
|
||||||
|
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
|
||||||
|
// TODO: this is probably never the case for folders?
|
||||||
|
if html.contains(r#"input[name="cmd[join]""#) {
|
||||||
|
return Ok(()); // ignore groups we are not in
|
||||||
|
}
|
||||||
|
warning!(name, "falling back to incomplete course content extractor!", e);
|
||||||
|
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
||||||
|
},
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ilias.get_course_content(&url).await?
|
||||||
|
};
|
||||||
|
if let Some(s) = content.1.as_ref() {
|
||||||
|
let path = path.join("course.html");
|
||||||
|
write_file_data(&path, &mut s.as_bytes())
|
||||||
|
.await
|
||||||
|
.context("failed to write course page html")?;
|
||||||
|
}
|
||||||
|
for item in content.0 {
|
||||||
|
let item = item?;
|
||||||
|
let path = path.join(file_escape(item.name()));
|
||||||
|
let ilias = Arc::clone(&ilias);
|
||||||
|
spawn(process_gracefully(ilias, path, item));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
66
src/ilias/exercise.rs
Normal file
66
src/ilias/exercise.rs
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
use std::{collections::HashSet, path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use scraper::Selector;
|
||||||
|
|
||||||
|
use crate::{process_gracefully, queue::spawn, util::file_escape};
|
||||||
|
|
||||||
|
use super::{Object, ILIAS, URL};
|
||||||
|
|
||||||
|
static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
|
static FORM_GROUP: Lazy<Selector> = Lazy::new(|| Selector::parse(".form-group").unwrap());
|
||||||
|
static FORM_NAME: Lazy<Selector> = Lazy::new(|| Selector::parse(".il_InfoScreenProperty").unwrap());
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
let html = ilias.get_html(&url.url).await?;
|
||||||
|
let mut filenames = HashSet::new();
|
||||||
|
for row in html.select(&FORM_GROUP) {
|
||||||
|
let link = row.select(&LINKS).next();
|
||||||
|
if link.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let link = link.unwrap();
|
||||||
|
let href = link.value().attr("href");
|
||||||
|
if href.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let href = href.unwrap();
|
||||||
|
let url = URL::from_href(href)?;
|
||||||
|
let cmd = url.cmd.as_deref().unwrap_or("");
|
||||||
|
if cmd != "downloadFile" && cmd != "downloadGlobalFeedbackFile" && cmd != "downloadFeedbackFile" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// link is definitely just a download link to the exercise or the solution
|
||||||
|
let name = row
|
||||||
|
.select(&FORM_NAME)
|
||||||
|
.next()
|
||||||
|
.context("link without file name")?
|
||||||
|
.text()
|
||||||
|
.collect::<String>()
|
||||||
|
.trim()
|
||||||
|
.to_owned();
|
||||||
|
let item = Object::File { url, name };
|
||||||
|
let mut path = path.to_owned();
|
||||||
|
// handle files with the same name
|
||||||
|
let filename = file_escape(item.name());
|
||||||
|
let mut parts = filename.rsplitn(2, '.');
|
||||||
|
let extension = parts.next().unwrap_or(&filename);
|
||||||
|
let name = parts.next().unwrap_or("");
|
||||||
|
let mut unique_filename = filename.clone();
|
||||||
|
let mut i = 1;
|
||||||
|
while filenames.contains(&unique_filename) {
|
||||||
|
i += 1;
|
||||||
|
if name.is_empty() {
|
||||||
|
unique_filename = format!("{}{}", extension, i);
|
||||||
|
} else {
|
||||||
|
unique_filename = format!("{}{}.{}", name, i, extension);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filenames.insert(unique_filename.clone());
|
||||||
|
path.push(unique_filename);
|
||||||
|
let ilias = Arc::clone(&ilias);
|
||||||
|
spawn(process_gracefully(ilias, path, item));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
22
src/ilias/file.rs
Normal file
22
src/ilias/file.rs
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
use std::{path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
use crate::util::write_stream_to_file;
|
||||||
|
|
||||||
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
if ilias.opt.skip_files {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if !ilias.opt.force && fs::metadata(&path).await.is_ok() {
|
||||||
|
log!(2, "Skipping download, file exists already");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let data = ilias.download(&url.url).await?;
|
||||||
|
log!(0, "Writing {}", relative_path.to_string_lossy());
|
||||||
|
write_stream_to_file(&path, data.bytes_stream()).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
28
src/ilias/folder.rs
Normal file
28
src/ilias/folder.rs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
use std::{path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
process_gracefully,
|
||||||
|
queue::spawn,
|
||||||
|
util::{file_escape, write_file_data},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
let content = ilias.get_course_content(&url).await?;
|
||||||
|
if let Some(s) = content.1.as_ref() {
|
||||||
|
let path = path.join("folder.html");
|
||||||
|
write_file_data(&path, &mut s.as_bytes())
|
||||||
|
.await
|
||||||
|
.context("failed to write folder page html")?;
|
||||||
|
}
|
||||||
|
for item in content.0 {
|
||||||
|
let item = item?;
|
||||||
|
let path = path.join(file_escape(item.name()));
|
||||||
|
let ilias = Arc::clone(&ilias);
|
||||||
|
spawn(process_gracefully(ilias, path, item));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
100
src/ilias/forum.rs
Normal file
100
src/ilias/forum.rs
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
use std::{path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
use crate::{ilias::Object, process_gracefully, queue::spawn, util::file_escape};
|
||||||
|
|
||||||
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
|
static TABLE_HEADER: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap());
|
||||||
|
static TABLE_ROW: Lazy<Selector> = Lazy::new(|| Selector::parse("tr").unwrap());
|
||||||
|
static TABLE_CELLS: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
|
||||||
|
|
||||||
|
static FORUM_PAGES: Lazy<Selector> =
|
||||||
|
Lazy::new(|| Selector::parse("div.ilTableNav > table > tbody > tr > td > a").unwrap());
|
||||||
|
|
||||||
|
const NO_ENTRIES: &str = "Keine Einträge";
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
if !ilias.opt.forum {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let url = &url.url;
|
||||||
|
let html = {
|
||||||
|
let data = ilias.download(url);
|
||||||
|
let html_text = data.await?.text().await?;
|
||||||
|
let url = {
|
||||||
|
let html = Html::parse_document(&html_text);
|
||||||
|
let thread_count_selector = html
|
||||||
|
.select(&LINKS)
|
||||||
|
.flat_map(|x| x.value().attr("href"))
|
||||||
|
.find(|x| x.contains("trows=800"));
|
||||||
|
if thread_count_selector.is_none() {
|
||||||
|
if let Some(cell) = html.select(&TABLE_CELLS).next() {
|
||||||
|
if cell.text().any(|x| x == NO_ENTRIES) {
|
||||||
|
return Ok(()); // empty forum
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
thread_count_selector
|
||||||
|
.context("can't find forum thread count selector (empty forum?)")?
|
||||||
|
.to_owned()
|
||||||
|
};
|
||||||
|
let data = ilias.download(&url);
|
||||||
|
let html = data.await?.text().await?;
|
||||||
|
Html::parse_document(&html)
|
||||||
|
};
|
||||||
|
for row in html.select(&TABLE_ROW) {
|
||||||
|
if row.value().attr("class") == Some("hidden-print") {
|
||||||
|
continue; // thread count
|
||||||
|
}
|
||||||
|
if row.select(&TABLE_HEADER).next().is_some() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let cells = row.select(&TABLE_CELLS).collect::<Vec<_>>();
|
||||||
|
if cells.len() != 6 {
|
||||||
|
warning!(format =>
|
||||||
|
"Warning: {}{} {} {}",
|
||||||
|
"unusual table row (", cells.len(), "cells) in", url.to_string()
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let link = cells[1].select(&LINKS).next().context("thread link not found")?;
|
||||||
|
let object = Object::from_link(link, link)?;
|
||||||
|
let mut path = path.to_owned();
|
||||||
|
let name = format!(
|
||||||
|
"{}_{}",
|
||||||
|
object.url().thr_pk.as_ref().context("thr_pk not found for thread")?,
|
||||||
|
link.text().collect::<String>().trim()
|
||||||
|
);
|
||||||
|
path.push(file_escape(&name));
|
||||||
|
// FIXME: this heuristic no longer works after downloading attachments
|
||||||
|
// TODO: set modification date?
|
||||||
|
let saved_posts = {
|
||||||
|
match std::fs::read_dir(&path) {
|
||||||
|
// TODO: make this async
|
||||||
|
Ok(stream) => stream.count(),
|
||||||
|
Err(_) => 0,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let available_posts = cells[3]
|
||||||
|
.text()
|
||||||
|
.next()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.trim()
|
||||||
|
.parse::<usize>()
|
||||||
|
.context("parsing post count failed")?;
|
||||||
|
if available_posts <= saved_posts && !ilias.opt.force {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let ilias = Arc::clone(&ilias);
|
||||||
|
spawn(process_gracefully(ilias, path, object));
|
||||||
|
}
|
||||||
|
if html.select(&FORUM_PAGES).count() > 0 {
|
||||||
|
log!(0, "Ignoring older threads in {:?}..", path);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
87
src/ilias/plugin_dispatch.rs
Normal file
87
src/ilias/plugin_dispatch.rs
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
use std::{path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use reqwest::Url;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
use crate::{ilias::Object, process_gracefully, queue::spawn, util::file_escape, ILIAS_URL};
|
||||||
|
|
||||||
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
|
static A_TARGET_BLANK: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap());
|
||||||
|
static VIDEO_ROWS: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap());
|
||||||
|
static TABLE_CELLS: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
|
||||||
|
|
||||||
|
const NO_ENTRIES: &str = "Keine Einträge";
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
if ilias.opt.no_videos {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let full_url = {
|
||||||
|
// first find the link to full video list
|
||||||
|
let list_url = format!("{}ilias.php?ref_id={}&cmdClass=xocteventgui&cmdNode=nc:n4:14u&baseClass=ilObjPluginDispatchGUI&lang=de&limit=20&cmd=asyncGetTableGUI&cmdMode=asynch", ILIAS_URL, url.ref_id);
|
||||||
|
log!(1, "Loading {}", list_url);
|
||||||
|
let data = ilias.download(&list_url).await?;
|
||||||
|
let html = data.text().await?;
|
||||||
|
let html = Html::parse_fragment(&html);
|
||||||
|
html.select(&LINKS)
|
||||||
|
.filter_map(|link| link.value().attr("href"))
|
||||||
|
.filter(|href| href.contains("trows=800"))
|
||||||
|
.map(|x| x.to_string())
|
||||||
|
.next()
|
||||||
|
.context("video list link not found")?
|
||||||
|
};
|
||||||
|
log!(1, "Rewriting {}", full_url);
|
||||||
|
let mut full_url = Url::parse(&format!("{}{}", ILIAS_URL, full_url))?;
|
||||||
|
let mut query_parameters = full_url
|
||||||
|
.query_pairs()
|
||||||
|
.map(|(x, y)| (x.into_owned(), y.into_owned()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
for (key, value) in &mut query_parameters {
|
||||||
|
match key.as_ref() {
|
||||||
|
"cmd" => *value = "asyncGetTableGUI".into(),
|
||||||
|
"cmdClass" => *value = "xocteventgui".into(),
|
||||||
|
_ => {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
query_parameters.push(("cmdMode".into(), "asynch".into()));
|
||||||
|
full_url
|
||||||
|
.query_pairs_mut()
|
||||||
|
.clear()
|
||||||
|
.extend_pairs(&query_parameters)
|
||||||
|
.finish();
|
||||||
|
log!(1, "Loading {}", full_url);
|
||||||
|
let data = ilias.download(full_url.as_str()).await?;
|
||||||
|
let html = data.text().await?;
|
||||||
|
let html = Html::parse_fragment(&html);
|
||||||
|
for row in html.select(&VIDEO_ROWS) {
|
||||||
|
let link = row.select(&A_TARGET_BLANK).next();
|
||||||
|
if link.is_none() {
|
||||||
|
if !row.text().any(|x| x == NO_ENTRIES) {
|
||||||
|
warning!(format => "table row without link in {}", url.url);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let link = link.unwrap();
|
||||||
|
let mut cells = row.select(&TABLE_CELLS);
|
||||||
|
if let Some(title) = cells.nth(2) {
|
||||||
|
let title = title.text().collect::<String>();
|
||||||
|
let title = title.trim();
|
||||||
|
if title.starts_with("<div") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let mut path = path.to_owned();
|
||||||
|
path.push(format!("{}.mp4", file_escape(title)));
|
||||||
|
log!(1, "Found video: {}", title);
|
||||||
|
let video = Object::Video {
|
||||||
|
url: URL::raw(link.value().attr("href").context("video link without href")?.to_owned()),
|
||||||
|
};
|
||||||
|
let ilias = Arc::clone(&ilias);
|
||||||
|
spawn(process_gracefully(ilias, path, video));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
145
src/ilias/thread.rs
Normal file
145
src/ilias/thread.rs
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
use std::{path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
use scraper::Selector;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
handle_gracefully, process_gracefully,
|
||||||
|
queue::spawn,
|
||||||
|
util::{file_escape, write_file_data},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{Object, ILIAS, URL};
|
||||||
|
|
||||||
|
static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
|
static IMAGES: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap());
|
||||||
|
static TABLES: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap());
|
||||||
|
static LINK_IN_TABLE: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap());
|
||||||
|
static POST_ROW: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostRow").unwrap());
|
||||||
|
static POST_TITLE: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostTitle").unwrap());
|
||||||
|
static POST_CONTAINER: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostContentContainer").unwrap());
|
||||||
|
static POST_ATTACHMENTS: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostAttachmentsContainer").unwrap());
|
||||||
|
static SPAN_SMALL: Lazy<Selector> = Lazy::new(|| Selector::parse("span.small").unwrap());
|
||||||
|
static IMAGE_SRC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap());
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
if !ilias.opt.forum {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let mut all_images = Vec::new();
|
||||||
|
let mut attachments = Vec::new();
|
||||||
|
{
|
||||||
|
let html = ilias.get_html(&url.url).await?;
|
||||||
|
for post in html.select(&POST_ROW) {
|
||||||
|
let title = post
|
||||||
|
.select(&POST_TITLE)
|
||||||
|
.next()
|
||||||
|
.context("post title not found")?
|
||||||
|
.text()
|
||||||
|
.collect::<String>();
|
||||||
|
let author = post.select(&SPAN_SMALL).next().context("post author not found")?;
|
||||||
|
let author = author.text().collect::<String>();
|
||||||
|
let author = author.trim().split('|').collect::<Vec<_>>();
|
||||||
|
let author = if author.len() == 2 {
|
||||||
|
author[0] // pseudonymous forum
|
||||||
|
} else if author.len() == 3 {
|
||||||
|
if author[1] != "Pseudonym" {
|
||||||
|
author[1]
|
||||||
|
} else {
|
||||||
|
author[0]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(anyhow!("author data in unknown format"));
|
||||||
|
}
|
||||||
|
.trim();
|
||||||
|
let container = post
|
||||||
|
.select(&POST_CONTAINER)
|
||||||
|
.next()
|
||||||
|
.context("post container not found")?;
|
||||||
|
let link = container.select(&LINKS).next().context("post link not found")?;
|
||||||
|
let id = link.value().attr("id").context("no id in thread link")?.to_owned();
|
||||||
|
let name = format!("{}_{}_{}.html", id, author, title.trim());
|
||||||
|
let data = container.inner_html();
|
||||||
|
let path = path.join(file_escape(&name));
|
||||||
|
let relative_path = relative_path.join(file_escape(&name));
|
||||||
|
spawn(handle_gracefully(async move {
|
||||||
|
log!(0, "Writing {}", relative_path.display());
|
||||||
|
write_file_data(&path, &mut data.as_bytes())
|
||||||
|
.await
|
||||||
|
.context("failed to write forum post")
|
||||||
|
}));
|
||||||
|
let images = container
|
||||||
|
.select(&IMAGES)
|
||||||
|
.map(|x| x.value().attr("src").map(|x| x.to_owned()));
|
||||||
|
for image in images {
|
||||||
|
let image = image.context("no src on image")?;
|
||||||
|
all_images.push((id.clone(), image));
|
||||||
|
}
|
||||||
|
if let Some(container) = container.select(&POST_ATTACHMENTS).next() {
|
||||||
|
for attachment in container.select(&LINKS) {
|
||||||
|
attachments.push((
|
||||||
|
id.clone(),
|
||||||
|
attachment.text().collect::<String>(),
|
||||||
|
attachment.value().attr("href").map(|x| x.to_owned()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// pagination
|
||||||
|
if let Some(pages) = html.select(&TABLES).next() {
|
||||||
|
if let Some(last) = pages.select(&LINK_IN_TABLE).last() {
|
||||||
|
let text = last.text().collect::<String>();
|
||||||
|
if text.trim() == ">>" {
|
||||||
|
// not last page yet
|
||||||
|
let ilias = Arc::clone(&ilias);
|
||||||
|
let next_page = Object::Thread {
|
||||||
|
url: URL::from_href(last.value().attr("href").context("page link not found")?)?,
|
||||||
|
};
|
||||||
|
spawn(process_gracefully(ilias, path.to_owned(), next_page));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log!(
|
||||||
|
0,
|
||||||
|
"Warning: {} {}",
|
||||||
|
"unable to find pagination links in".bright_yellow(),
|
||||||
|
url.url.to_string().bright_yellow()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (id, image) in all_images {
|
||||||
|
let src = URL::from_href(&image)?;
|
||||||
|
let dl = ilias.download(&src.url).await?;
|
||||||
|
let mut path = path.to_owned();
|
||||||
|
if let Some(m) = IMAGE_SRC_REGEX.captures(&image) {
|
||||||
|
// image uploaded to ILIAS
|
||||||
|
let (media_id, filename) = (m.get(1).unwrap().as_str(), m.get(2).unwrap().as_str());
|
||||||
|
path.push(file_escape(&format!("{}_{}_{}", id, media_id, filename)));
|
||||||
|
} else {
|
||||||
|
// external image
|
||||||
|
path.push(file_escape(&format!("{}_{}", id, image)));
|
||||||
|
}
|
||||||
|
spawn(handle_gracefully(async move {
|
||||||
|
let bytes = dl.bytes().await?;
|
||||||
|
write_file_data(&path, &mut &*bytes)
|
||||||
|
.await
|
||||||
|
.context("failed to write forum post image attachment")
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
for (id, name, url) in attachments {
|
||||||
|
let url = url.context("attachment without href")?;
|
||||||
|
let src = URL::from_href(&url)?;
|
||||||
|
let dl = ilias.download(&src.url).await?;
|
||||||
|
let mut path = path.to_owned();
|
||||||
|
path.push(file_escape(&format!("{}_{}", id, name)));
|
||||||
|
spawn(handle_gracefully(async move {
|
||||||
|
let bytes = dl.bytes().await?;
|
||||||
|
write_file_data(&path, &mut &*bytes)
|
||||||
|
.await
|
||||||
|
.context("failed to write forum post file attachment")
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
57
src/ilias/video.rs
Normal file
57
src/ilias/video.rs
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
use std::{path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
use crate::{util::write_stream_to_file, ILIAS_URL};
|
||||||
|
|
||||||
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
static XOCT_REGEX: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap());
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
if ilias.opt.no_videos {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if fs::metadata(&path).await.is_ok() && !(ilias.opt.force || ilias.opt.check_videos) {
|
||||||
|
log!(2, "Skipping download, file exists already");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let url = format!("{}{}", ILIAS_URL, url.url);
|
||||||
|
let data = ilias.download(&url);
|
||||||
|
let html = data.await?.text().await?;
|
||||||
|
log!(2, "{}", html);
|
||||||
|
let json: serde_json::Value = {
|
||||||
|
let mut json_capture = XOCT_REGEX.captures_iter(&html);
|
||||||
|
let json = &json_capture.next().context("xoct player json not found")?[1];
|
||||||
|
log!(2, "{}", json);
|
||||||
|
let json = json.split(",\n").next().context("invalid xoct player json")?;
|
||||||
|
serde_json::from_str(&json.trim())?
|
||||||
|
};
|
||||||
|
log!(2, "{}", json);
|
||||||
|
let url = json
|
||||||
|
.pointer("/streams/0/sources/mp4/0/src")
|
||||||
|
.context("video src not found")?
|
||||||
|
.as_str()
|
||||||
|
.context("video src not string")?;
|
||||||
|
let meta = fs::metadata(&path).await;
|
||||||
|
if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos {
|
||||||
|
let head = ilias.head(url).await.context("HEAD request failed")?;
|
||||||
|
if let Some(len) = head.headers().get("content-length") {
|
||||||
|
if meta?.len() != len.to_str()?.parse::<u64>()? {
|
||||||
|
warning!(
|
||||||
|
relative_path.to_string_lossy(),
|
||||||
|
"was updated, consider moving the outdated file"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let resp = ilias.download(&url).await?;
|
||||||
|
log!(0, "Writing {}", relative_path.to_string_lossy());
|
||||||
|
write_stream_to_file(&path, resp.bytes_stream()).await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
70
src/ilias/weblink.rs
Normal file
70
src/ilias/weblink.rs
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
use std::{path::Path, sync::Arc};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use scraper::Selector;
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
util::{create_dir, file_escape, write_file_data},
|
||||||
|
ILIAS_URL,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{ILIAS, URL};
|
||||||
|
|
||||||
|
static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
|
|
||||||
|
pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url: &URL) -> Result<()> {
|
||||||
|
if !ilias.opt.force && fs::metadata(&path).await.is_ok() {
|
||||||
|
log!(2, "Skipping download, link exists already");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let head_req_result = ilias.head(&url.url).await;
|
||||||
|
let url = match &head_req_result {
|
||||||
|
Err(e) => e.url().context("HEAD request failed")?.as_str(),
|
||||||
|
Ok(head) => head.url().as_str(),
|
||||||
|
};
|
||||||
|
if url.starts_with(ILIAS_URL) {
|
||||||
|
// is a link list
|
||||||
|
if fs::metadata(&path).await.is_err() {
|
||||||
|
create_dir(&path).await?;
|
||||||
|
log!(0, "Writing {}", relative_path.to_string_lossy());
|
||||||
|
}
|
||||||
|
|
||||||
|
let urls = {
|
||||||
|
let html = ilias.get_html(url).await?;
|
||||||
|
html.select(&LINKS)
|
||||||
|
.filter_map(|x| x.value().attr("href").map(|y| (y, x.text().collect::<String>())))
|
||||||
|
.map(|(x, y)| {
|
||||||
|
URL::from_href(x)
|
||||||
|
.map(|z| (z, y.trim().to_owned()))
|
||||||
|
.context("parsing weblink")
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()
|
||||||
|
}?;
|
||||||
|
|
||||||
|
for (url, name) in urls {
|
||||||
|
if url.cmd.as_deref().unwrap_or("") != "callLink" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let head = ilias
|
||||||
|
.head(url.url.as_str())
|
||||||
|
.await
|
||||||
|
.context("HEAD request to web link failed");
|
||||||
|
if let Some(err) = head.as_ref().err() {
|
||||||
|
warning!(err);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let head = head.unwrap();
|
||||||
|
let url = head.url().as_str();
|
||||||
|
write_file_data(path.join(file_escape(&name)), &mut url.as_bytes()).await?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log!(0, "Writing {}", relative_path.to_string_lossy());
|
||||||
|
write_file_data(&path, &mut url.as_bytes())
|
||||||
|
.await
|
||||||
|
.context("failed to save weblink URL")?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
555
src/main.rs
555
src/main.rs
@ -1,26 +1,14 @@
|
|||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
#![allow(clippy::upper_case_acronyms)]
|
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use colored::Colorize;
|
|
||||||
use futures::future::{self, Either};
|
use futures::future::{self, Either};
|
||||||
use futures_channel::mpsc::UnboundedSender;
|
use futures::StreamExt;
|
||||||
use futures_util::stream::TryStreamExt;
|
|
||||||
use futures_util::StreamExt;
|
|
||||||
use ignore::gitignore::Gitignore;
|
use ignore::gitignore::Gitignore;
|
||||||
use indicatif::{ProgressDrawTarget, ProgressStyle};
|
use indicatif::{ProgressDrawTarget, ProgressStyle};
|
||||||
use once_cell::sync::{Lazy, OnceCell};
|
|
||||||
use scraper::Html;
|
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
use tokio::task::{self, JoinHandle};
|
use tokio::fs;
|
||||||
use tokio::{fs, sync::Semaphore, time};
|
|
||||||
use tokio_util::io::StreamReader;
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::future::Future;
|
use std::future::Future;
|
||||||
use std::io;
|
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::atomic::Ordering;
|
use std::sync::atomic::Ordering;
|
||||||
@ -29,7 +17,8 @@ use std::time::SystemTime;
|
|||||||
|
|
||||||
static ILIAS_URL: &str = "https://ilias.studium.kit.edu/";
|
static ILIAS_URL: &str = "https://ilias.studium.kit.edu/";
|
||||||
/// main personal desktop
|
/// main personal desktop
|
||||||
static DEFAULT_SYNC_URL: &str = "https://ilias.studium.kit.edu/ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems";
|
static DEFAULT_SYNC_URL: &str =
|
||||||
|
"https://ilias.studium.kit.edu/ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems";
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
mod cli;
|
mod cli;
|
||||||
@ -37,35 +26,13 @@ use cli::*;
|
|||||||
mod ilias;
|
mod ilias;
|
||||||
use ilias::*;
|
use ilias::*;
|
||||||
use Object::*;
|
use Object::*;
|
||||||
|
mod queue;
|
||||||
mod util;
|
mod util;
|
||||||
use util::*;
|
use util::*;
|
||||||
|
|
||||||
/// Global job queue
|
|
||||||
static TASKS: OnceCell<UnboundedSender<JoinHandle<()>>> = OnceCell::new();
|
|
||||||
static TASKS_RUNNING: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0));
|
|
||||||
static REQUEST_TICKETS: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0));
|
|
||||||
|
|
||||||
pub async fn get_request_ticket() {
|
|
||||||
REQUEST_TICKETS.acquire().await.unwrap().forget();
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! spawn {
|
|
||||||
($e:expr) => {
|
|
||||||
TASKS.get().unwrap().unbounded_send(task::spawn($e)).unwrap();
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
let opt = Opt::from_args();
|
let opt = Opt::from_args();
|
||||||
let rate = opt.rate;
|
|
||||||
task::spawn(async move {
|
|
||||||
let mut interval = time::interval(time::Duration::from_secs_f64(60.0 / rate as f64));
|
|
||||||
loop {
|
|
||||||
interval.tick().await;
|
|
||||||
REQUEST_TICKETS.add_permits(1);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if let Err(e) = real_main(opt).await {
|
if let Err(e) = real_main(opt).await {
|
||||||
error!(e);
|
error!(e);
|
||||||
}
|
}
|
||||||
@ -105,7 +72,7 @@ async fn login(opt: Opt, ignore: Gitignore) -> Result<ILIAS> {
|
|||||||
error!(e)
|
error!(e)
|
||||||
} else {
|
} else {
|
||||||
success!("Session still active!");
|
success!("Session still active!");
|
||||||
return Ok(ilias)
|
return Ok(ilias);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Err(e) => warning!(e),
|
Err(e) => warning!(e),
|
||||||
@ -141,9 +108,13 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
let _ = colored::control::set_virtual_terminal(true);
|
let _ = colored::control::set_virtual_terminal(true);
|
||||||
|
|
||||||
create_dir(&opt.output).await.context("failed to create output directory")?;
|
create_dir(&opt.output)
|
||||||
|
.await
|
||||||
|
.context("failed to create output directory")?;
|
||||||
// use UNC paths on Windows (to avoid the default max. path length of 255)
|
// use UNC paths on Windows (to avoid the default max. path length of 255)
|
||||||
opt.output = fs::canonicalize(opt.output).await.context("failed to canonicalize output directory")?;
|
opt.output = fs::canonicalize(opt.output)
|
||||||
|
.await
|
||||||
|
.context("failed to canonicalize output directory")?;
|
||||||
|
|
||||||
// load .iliasignore file
|
// load .iliasignore file
|
||||||
let (ignore, error) = Gitignore::new(opt.output.join(".iliasignore"));
|
let (ignore, error) = Gitignore::new(opt.output.join(".iliasignore"));
|
||||||
@ -151,6 +122,8 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
warning!(err);
|
warning!(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
queue::set_download_rate(opt.rate);
|
||||||
|
|
||||||
let ilias = login(opt, ignore).await?;
|
let ilias = login(opt, ignore).await?;
|
||||||
|
|
||||||
if ilias.opt.content_tree {
|
if ilias.opt.content_tree {
|
||||||
@ -162,9 +135,7 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let ilias = Arc::new(ilias);
|
let ilias = Arc::new(ilias);
|
||||||
let (tx, mut rx) = futures_channel::mpsc::unbounded::<JoinHandle<()>>();
|
let mut rx = queue::set_parallel_jobs(ilias.opt.jobs);
|
||||||
TASKS.get_or_init(|| tx.clone());
|
|
||||||
TASKS_RUNNING.add_permits(ilias.opt.jobs);
|
|
||||||
PROGRESS_BAR_ENABLED.store(atty::is(atty::Stream::Stdout), Ordering::SeqCst);
|
PROGRESS_BAR_ENABLED.store(atty::is(atty::Stream::Stdout), Ordering::SeqCst);
|
||||||
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
||||||
PROGRESS_BAR.set_draw_target(ProgressDrawTarget::stderr_nohz());
|
PROGRESS_BAR.set_draw_target(ProgressDrawTarget::stderr_nohz());
|
||||||
@ -173,8 +144,13 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let sync_url = ilias.opt.sync_url.as_deref().unwrap_or(DEFAULT_SYNC_URL);
|
let sync_url = ilias.opt.sync_url.as_deref().unwrap_or(DEFAULT_SYNC_URL);
|
||||||
let obj = Object::from_url(URL::from_href(sync_url).context("invalid sync URL")?, String::new(), None).context("invalid sync object")?; // name can be empty for first element
|
let obj = Object::from_url(
|
||||||
spawn!(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
|
URL::from_href(sync_url).context("invalid sync URL")?,
|
||||||
|
String::new(),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.context("invalid sync object")?;
|
||||||
|
queue::spawn(process_gracefully(ilias.clone(), ilias.opt.output.clone(), obj));
|
||||||
|
|
||||||
while let Either::Left((task, _)) = future::select(rx.next(), future::ready(())).await {
|
while let Either::Left((task, _)) = future::select(rx.next(), future::ready(())).await {
|
||||||
if let Some(task) = task {
|
if let Some(task) = task {
|
||||||
@ -208,11 +184,11 @@ async fn real_main(mut opt: Opt) -> Result<()> {
|
|||||||
// https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229
|
// https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229
|
||||||
#[allow(clippy::manual_async_fn)]
|
#[allow(clippy::manual_async_fn)]
|
||||||
fn process_gracefully(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl Future<Output = ()> + Send {
|
fn process_gracefully(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl Future<Output = ()> + Send {
|
||||||
|
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
||||||
|
PROGRESS_BAR.inc_length(1);
|
||||||
|
}
|
||||||
async move {
|
async move {
|
||||||
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
let permit = queue::get_ticket();
|
||||||
PROGRESS_BAR.inc_length(1);
|
|
||||||
}
|
|
||||||
let permit = TASKS_RUNNING.acquire().await.unwrap();
|
|
||||||
let path_text = path.to_string_lossy().into_owned();
|
let path_text = path.to_string_lossy().into_owned();
|
||||||
if let Err(e) = process(ilias, path, obj).await.context("failed to process URL") {
|
if let Err(e) = process(ilias, path, obj).await.context("failed to process URL") {
|
||||||
error!("Syncing {}", path_text; e);
|
error!("Syncing {}", path_text; e);
|
||||||
@ -227,46 +203,9 @@ async fn handle_gracefully(fut: impl Future<Output = Result<()>>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_upper_case_globals)]
|
|
||||||
mod selectors {
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use regex::Regex;
|
|
||||||
use scraper::Selector;
|
|
||||||
// construct CSS selectors once
|
|
||||||
pub static LINKS: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
|
||||||
pub static a_target_blank: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap());
|
|
||||||
pub static IMAGES: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap());
|
|
||||||
pub static TABLES: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap());
|
|
||||||
pub static VIDEO_ROWS: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap());
|
|
||||||
pub static links_in_table: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap());
|
|
||||||
pub static th: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap());
|
|
||||||
pub static td: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
|
|
||||||
pub static tr: Lazy<Selector> = Lazy::new(|| Selector::parse("tr").unwrap());
|
|
||||||
pub static post_row: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostRow").unwrap());
|
|
||||||
pub static post_title: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostTitle").unwrap());
|
|
||||||
pub static post_container: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostContentContainer").unwrap());
|
|
||||||
pub static post_attachments: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostAttachmentsContainer").unwrap());
|
|
||||||
pub static span_small: Lazy<Selector> = Lazy::new(|| Selector::parse("span.small").unwrap());
|
|
||||||
pub static forum_pages: Lazy<Selector> = Lazy::new(|| Selector::parse("div.ilTableNav > table > tbody > tr > td > a").unwrap());
|
|
||||||
pub static alert_danger: Lazy<Selector> = Lazy::new(|| Selector::parse("div.alert-danger").unwrap());
|
|
||||||
pub static form_group: Lazy<Selector> = Lazy::new(|| Selector::parse(".form-group").unwrap());
|
|
||||||
pub static form_name: Lazy<Selector> = Lazy::new(|| Selector::parse(".il_InfoScreenProperty").unwrap());
|
|
||||||
pub static cmd_node_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"cmdNode=uf:\w\w"#).unwrap());
|
|
||||||
pub static image_src_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap());
|
|
||||||
pub static XOCT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap());
|
|
||||||
pub static il_content_container: Lazy<Selector> = Lazy::new(|| Selector::parse("#il_center_col").unwrap());
|
|
||||||
pub static item_prop: Lazy<Selector> = Lazy::new(|| Selector::parse("span.il_ItemProperty").unwrap());
|
|
||||||
pub static container_items: Lazy<Selector> = Lazy::new(|| Selector::parse("div.il_ContainerListItem").unwrap());
|
|
||||||
pub static container_item_title: Lazy<Selector> = Lazy::new(|| Selector::parse("a.il_ContainerItemTitle").unwrap());
|
|
||||||
}
|
|
||||||
use crate::selectors::*;
|
|
||||||
|
|
||||||
const NO_ENTRIES: &str = "Keine Einträge";
|
|
||||||
|
|
||||||
async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
||||||
let relative_path = path.strip_prefix(&ilias.opt.output).unwrap();
|
let relative_path = path.strip_prefix(&ilias.opt.output).unwrap();
|
||||||
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
||||||
PROGRESS_BAR.inc(1);
|
|
||||||
let path = relative_path.display().to_string();
|
let path = relative_path.display().to_string();
|
||||||
if !path.is_empty() {
|
if !path.is_empty() {
|
||||||
PROGRESS_BAR.set_message(path);
|
PROGRESS_BAR.set_message(path);
|
||||||
@ -284,447 +223,31 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
}
|
}
|
||||||
match &obj {
|
match &obj {
|
||||||
Course { url, name } => {
|
Course { url, name } => {
|
||||||
let content = if ilias.opt.content_tree {
|
ilias::course::download(path, ilias, url, name).await?;
|
||||||
let html = ilias.download(&url.url).await?.text().await?;
|
|
||||||
let cmd_node = cmd_node_regex.find(&html).context("can't find cmdNode")?.as_str()[8..].to_owned();
|
|
||||||
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
|
|
||||||
match content_tree {
|
|
||||||
Ok(tree) => (tree.into_iter().map(Result::Ok).collect(), None),
|
|
||||||
Err(e) => {
|
|
||||||
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
|
|
||||||
// TODO: this is probably never the case for folders?
|
|
||||||
if html.contains(r#"input[name="cmd[join]""#) {
|
|
||||||
return Ok(()); // ignore groups we are not in
|
|
||||||
}
|
|
||||||
warning!(name, "falling back to incomplete course content extractor!", e);
|
|
||||||
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
|
||||||
},
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ilias.get_course_content(&url).await?
|
|
||||||
};
|
|
||||||
if let Some(s) = content.1.as_ref() {
|
|
||||||
let path = path.join("course.html");
|
|
||||||
write_file_data(&path, &mut s.as_bytes())
|
|
||||||
.await
|
|
||||||
.context("failed to write course page html")?;
|
|
||||||
}
|
|
||||||
for item in content.0 {
|
|
||||||
let item = item?;
|
|
||||||
let path = path.join(file_escape(item.name()));
|
|
||||||
let ilias = Arc::clone(&ilias);
|
|
||||||
spawn!(process_gracefully(ilias, path, item));
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Folder { url, .. } | PersonalDesktop { url } => {
|
Folder { url, .. } | PersonalDesktop { url } => {
|
||||||
let content = ilias.get_course_content(&url).await?;
|
ilias::folder::download(&path, ilias, url).await?;
|
||||||
if let Some(s) = content.1.as_ref() {
|
|
||||||
let path = path.join("folder.html");
|
|
||||||
write_file_data(&path, &mut s.as_bytes())
|
|
||||||
.await
|
|
||||||
.context("failed to write folder page html")?;
|
|
||||||
}
|
|
||||||
for item in content.0 {
|
|
||||||
let item = item?;
|
|
||||||
let path = path.join(file_escape(item.name()));
|
|
||||||
let ilias = Arc::clone(&ilias);
|
|
||||||
spawn!(process_gracefully(ilias, path, item));
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
File { url, .. } => {
|
File { url, .. } => {
|
||||||
if ilias.opt.skip_files {
|
ilias::file::download(&path, relative_path, ilias, url).await?;
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
if !ilias.opt.force && fs::metadata(&path).await.is_ok() {
|
|
||||||
log!(2, "Skipping download, file exists already");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let data = ilias.download(&url.url).await?;
|
|
||||||
let mut reader = StreamReader::new(data.bytes_stream().map_err(|x| io::Error::new(io::ErrorKind::Other, x)));
|
|
||||||
log!(0, "Writing {}", relative_path.to_string_lossy());
|
|
||||||
write_file_data(&path, &mut reader).await?;
|
|
||||||
},
|
},
|
||||||
PluginDispatch { url, .. } => {
|
PluginDispatch { url, .. } => {
|
||||||
if ilias.opt.no_videos {
|
ilias::plugin_dispatch::download(&path, ilias, url).await?;
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let full_url = {
|
|
||||||
// first find the link to full video list
|
|
||||||
let list_url = format!("{}ilias.php?ref_id={}&cmdClass=xocteventgui&cmdNode=nc:n4:14u&baseClass=ilObjPluginDispatchGUI&lang=de&limit=20&cmd=asyncGetTableGUI&cmdMode=asynch", ILIAS_URL, url.ref_id);
|
|
||||||
log!(1, "Loading {}", list_url);
|
|
||||||
let data = ilias.download(&list_url).await?;
|
|
||||||
let html = data.text().await?;
|
|
||||||
let html = Html::parse_fragment(&html);
|
|
||||||
html.select(&LINKS)
|
|
||||||
.filter_map(|link| link.value().attr("href"))
|
|
||||||
.filter(|href| href.contains("trows=800"))
|
|
||||||
.map(|x| x.to_string())
|
|
||||||
.next()
|
|
||||||
.context("video list link not found")?
|
|
||||||
};
|
|
||||||
log!(1, "Rewriting {}", full_url);
|
|
||||||
let mut full_url = Url::parse(&format!("{}{}", ILIAS_URL, full_url))?;
|
|
||||||
let mut query_parameters = full_url.query_pairs().map(|(x, y)| (x.into_owned(), y.into_owned())).collect::<Vec<_>>();
|
|
||||||
for (key, value) in &mut query_parameters {
|
|
||||||
match key.as_ref() {
|
|
||||||
"cmd" => *value = "asyncGetTableGUI".into(),
|
|
||||||
"cmdClass" => *value = "xocteventgui".into(),
|
|
||||||
_ => {},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
query_parameters.push(("cmdMode".into(), "asynch".into()));
|
|
||||||
full_url.query_pairs_mut().clear().extend_pairs(&query_parameters).finish();
|
|
||||||
log!(1, "Loading {}", full_url);
|
|
||||||
let data = ilias.download(full_url.as_str()).await?;
|
|
||||||
let html = data.text().await?;
|
|
||||||
let html = Html::parse_fragment(&html);
|
|
||||||
for row in html.select(&VIDEO_ROWS) {
|
|
||||||
let link = row.select(&a_target_blank).next();
|
|
||||||
if link.is_none() {
|
|
||||||
if !row.text().any(|x| x == NO_ENTRIES) {
|
|
||||||
warning!(format => "table row without link in {}", url.url);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let link = link.unwrap();
|
|
||||||
let mut cells = row.select(&td);
|
|
||||||
if let Some(title) = cells.nth(2) {
|
|
||||||
let title = title.text().collect::<String>();
|
|
||||||
let title = title.trim();
|
|
||||||
if title.starts_with("<div") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let mut path = path.clone();
|
|
||||||
path.push(format!("{}.mp4", file_escape(title)));
|
|
||||||
log!(1, "Found video: {}", title);
|
|
||||||
let video = Video {
|
|
||||||
url: URL::raw(link.value().attr("href").context("video link without href")?.to_owned()),
|
|
||||||
};
|
|
||||||
let ilias = Arc::clone(&ilias);
|
|
||||||
spawn!(process_gracefully(ilias, path, video));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Video { url } => {
|
Video { url } => {
|
||||||
if ilias.opt.no_videos {
|
ilias::video::download(&path, relative_path, ilias, url).await?;
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
if fs::metadata(&path).await.is_ok() && !(ilias.opt.force || ilias.opt.check_videos) {
|
|
||||||
log!(2, "Skipping download, file exists already");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let url = format!("{}{}", ILIAS_URL, url.url);
|
|
||||||
let data = ilias.download(&url);
|
|
||||||
let html = data.await?.text().await?;
|
|
||||||
log!(2, "{}", html);
|
|
||||||
let json: serde_json::Value = {
|
|
||||||
let mut json_capture = XOCT_REGEX.captures_iter(&html);
|
|
||||||
let json = &json_capture.next().context("xoct player json not found")?[1];
|
|
||||||
log!(2, "{}", json);
|
|
||||||
let json = json.split(",\n").next().context("invalid xoct player json")?;
|
|
||||||
serde_json::from_str(&json.trim())?
|
|
||||||
};
|
|
||||||
log!(2, "{}", json);
|
|
||||||
let url = json
|
|
||||||
.pointer("/streams/0/sources/mp4/0/src")
|
|
||||||
.context("video src not found")?
|
|
||||||
.as_str()
|
|
||||||
.context("video src not string")?;
|
|
||||||
let meta = fs::metadata(&path).await;
|
|
||||||
if !ilias.opt.force && meta.is_ok() && ilias.opt.check_videos {
|
|
||||||
let head = ilias.head(url).await.context("HEAD request failed")?;
|
|
||||||
if let Some(len) = head.headers().get("content-length") {
|
|
||||||
if meta?.len() != len.to_str()?.parse::<u64>()? {
|
|
||||||
warning!(relative_path.to_string_lossy(), "was updated, consider moving the outdated file");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let resp = ilias.download(&url).await?;
|
|
||||||
let mut reader = StreamReader::new(resp.bytes_stream().map_err(|x| io::Error::new(io::ErrorKind::Other, x)));
|
|
||||||
log!(0, "Writing {}", relative_path.to_string_lossy());
|
|
||||||
write_file_data(&path, &mut reader).await?;
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Forum { url, .. } => {
|
Forum { url, .. } => {
|
||||||
if !ilias.opt.forum {
|
ilias::forum::download(&path, ilias, url).await?;
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let url = &url.url;
|
|
||||||
let html = {
|
|
||||||
let data = ilias.download(url);
|
|
||||||
let html_text = data.await?.text().await?;
|
|
||||||
let url = {
|
|
||||||
let html = Html::parse_document(&html_text);
|
|
||||||
let thread_count_selector = html.select(&LINKS).flat_map(|x| x.value().attr("href")).find(|x| x.contains("trows=800"));
|
|
||||||
if thread_count_selector.is_none() {
|
|
||||||
if let Some(cell) = html.select(&td).next() {
|
|
||||||
if cell.text().any(|x| x == NO_ENTRIES) {
|
|
||||||
return Ok(()); // empty forum
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
thread_count_selector
|
|
||||||
.context("can't find forum thread count selector (empty forum?)")?
|
|
||||||
.to_owned()
|
|
||||||
};
|
|
||||||
let data = ilias.download(&url);
|
|
||||||
let html = data.await?.text().await?;
|
|
||||||
Html::parse_document(&html)
|
|
||||||
};
|
|
||||||
for row in html.select(&tr) {
|
|
||||||
if row.value().attr("class") == Some("hidden-print") {
|
|
||||||
continue; // thread count
|
|
||||||
}
|
|
||||||
if row.select(&th).next().is_some() {
|
|
||||||
continue; // table header
|
|
||||||
}
|
|
||||||
let cells = row.select(&td).collect::<Vec<_>>();
|
|
||||||
if cells.len() != 6 {
|
|
||||||
warning!(format =>
|
|
||||||
"Warning: {}{} {} {}",
|
|
||||||
"unusual table row (", cells.len(), "cells) in", url.to_string()
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let link = cells[1].select(&LINKS).next().context("thread link not found")?;
|
|
||||||
let object = Object::from_link(link, link)?;
|
|
||||||
let mut path = path.clone();
|
|
||||||
let name = format!(
|
|
||||||
"{}_{}",
|
|
||||||
object.url().thr_pk.as_ref().context("thr_pk not found for thread")?,
|
|
||||||
link.text().collect::<String>().trim()
|
|
||||||
);
|
|
||||||
path.push(file_escape(&name));
|
|
||||||
// TODO: set modification date?
|
|
||||||
let saved_posts = {
|
|
||||||
match std::fs::read_dir(&path) {
|
|
||||||
// TODO: make this async
|
|
||||||
Ok(stream) => stream.count(),
|
|
||||||
Err(_) => 0,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let available_posts = cells[3]
|
|
||||||
.text()
|
|
||||||
.next()
|
|
||||||
.unwrap_or_default()
|
|
||||||
.trim()
|
|
||||||
.parse::<usize>()
|
|
||||||
.context("parsing post count failed")?;
|
|
||||||
if available_posts <= saved_posts && !ilias.opt.force {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let ilias = Arc::clone(&ilias);
|
|
||||||
spawn!(process_gracefully(ilias, path, object));
|
|
||||||
}
|
|
||||||
if html.select(&forum_pages).count() > 0 {
|
|
||||||
log!(0, "Ignoring older threads in {:?}..", path);
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Thread { url } => {
|
Thread { url } => {
|
||||||
if !ilias.opt.forum {
|
ilias::thread::download(&path, relative_path, ilias, url).await?;
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let mut all_images = Vec::new();
|
|
||||||
let mut attachments = Vec::new();
|
|
||||||
{
|
|
||||||
let html = ilias.get_html(&url.url).await?;
|
|
||||||
for post in html.select(&post_row) {
|
|
||||||
let title = post
|
|
||||||
.select(&post_title)
|
|
||||||
.next()
|
|
||||||
.context("post title not found")?
|
|
||||||
.text()
|
|
||||||
.collect::<String>();
|
|
||||||
let author = post.select(&span_small).next().context("post author not found")?;
|
|
||||||
let author = author.text().collect::<String>();
|
|
||||||
let author = author.trim().split('|').collect::<Vec<_>>();
|
|
||||||
let author = if author.len() == 2 {
|
|
||||||
author[0] // pseudonymous forum
|
|
||||||
} else if author.len() == 3 {
|
|
||||||
if author[1] != "Pseudonym" {
|
|
||||||
author[1]
|
|
||||||
} else {
|
|
||||||
author[0]
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Err(anyhow!("author data in unknown format"));
|
|
||||||
}
|
|
||||||
.trim();
|
|
||||||
let container = post.select(&post_container).next().context("post container not found")?;
|
|
||||||
let link = container.select(&LINKS).next().context("post link not found")?;
|
|
||||||
let id = link.value().attr("id").context("no id in thread link")?.to_owned();
|
|
||||||
let name = format!("{}_{}_{}.html", id, author, title.trim());
|
|
||||||
let data = container.inner_html();
|
|
||||||
let path = path.join(file_escape(&name));
|
|
||||||
let relative_path = relative_path.join(file_escape(&name));
|
|
||||||
spawn!(handle_gracefully(async move {
|
|
||||||
log!(0, "Writing {}", relative_path.display());
|
|
||||||
write_file_data(&path, &mut data.as_bytes()).await.context("failed to write forum post")
|
|
||||||
}));
|
|
||||||
let images = container.select(&IMAGES).map(|x| x.value().attr("src").map(|x| x.to_owned()));
|
|
||||||
for image in images {
|
|
||||||
let image = image.context("no src on image")?;
|
|
||||||
all_images.push((id.clone(), image));
|
|
||||||
}
|
|
||||||
if let Some(container) = container.select(&post_attachments).next() {
|
|
||||||
for attachment in container.select(&LINKS) {
|
|
||||||
attachments.push((
|
|
||||||
id.clone(),
|
|
||||||
attachment.text().collect::<String>(),
|
|
||||||
attachment.value().attr("href").map(|x| x.to_owned()),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// pagination
|
|
||||||
if let Some(pages) = html.select(&TABLES).next() {
|
|
||||||
if let Some(last) = pages.select(&links_in_table).last() {
|
|
||||||
let text = last.text().collect::<String>();
|
|
||||||
if text.trim() == ">>" {
|
|
||||||
// not last page yet
|
|
||||||
let ilias = Arc::clone(&ilias);
|
|
||||||
let next_page = Thread {
|
|
||||||
url: URL::from_href(last.value().attr("href").context("page link not found")?)?,
|
|
||||||
};
|
|
||||||
spawn!(process_gracefully(ilias, path.clone(), next_page));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log!(
|
|
||||||
0,
|
|
||||||
"Warning: {} {}",
|
|
||||||
"unable to find pagination links in".bright_yellow(),
|
|
||||||
url.url.to_string().bright_yellow()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (id, image) in all_images {
|
|
||||||
let src = URL::from_href(&image)?;
|
|
||||||
let dl = ilias.download(&src.url).await?;
|
|
||||||
let mut path = path.clone();
|
|
||||||
if let Some(m) = image_src_regex.captures(&image) {
|
|
||||||
// image uploaded to ILIAS
|
|
||||||
let (media_id, filename) = (m.get(1).unwrap().as_str(), m.get(2).unwrap().as_str());
|
|
||||||
path.push(file_escape(&format!("{}_{}_{}", id, media_id, filename)));
|
|
||||||
} else {
|
|
||||||
// external image
|
|
||||||
path.push(file_escape(&format!("{}_{}", id, image)));
|
|
||||||
}
|
|
||||||
spawn!(handle_gracefully(async move {
|
|
||||||
let bytes = dl.bytes().await?;
|
|
||||||
write_file_data(&path, &mut &*bytes)
|
|
||||||
.await
|
|
||||||
.context("failed to write forum post image attachment")
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
for (id, name, url) in attachments {
|
|
||||||
let url = url.context("attachment without href")?;
|
|
||||||
let src = URL::from_href(&url)?;
|
|
||||||
let dl = ilias.download(&src.url).await?;
|
|
||||||
let mut path = path.clone();
|
|
||||||
path.push(file_escape(&format!("{}_{}", id, name)));
|
|
||||||
spawn!(handle_gracefully(async move {
|
|
||||||
let bytes = dl.bytes().await?;
|
|
||||||
write_file_data(&path, &mut &*bytes)
|
|
||||||
.await
|
|
||||||
.context("failed to write forum post file attachment")
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
ExerciseHandler { url, .. } => {
|
ExerciseHandler { url, .. } => {
|
||||||
let html = ilias.get_html(&url.url).await?;
|
ilias::exercise::download(&path, ilias, url).await?;
|
||||||
let mut filenames = HashSet::new();
|
|
||||||
for row in html.select(&form_group) {
|
|
||||||
let link = row.select(&LINKS).next();
|
|
||||||
if link.is_none() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let link = link.unwrap();
|
|
||||||
let href = link.value().attr("href");
|
|
||||||
if href.is_none() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let href = href.unwrap();
|
|
||||||
let url = URL::from_href(href)?;
|
|
||||||
let cmd = url.cmd.as_deref().unwrap_or("");
|
|
||||||
if cmd != "downloadFile" && cmd != "downloadGlobalFeedbackFile" && cmd != "downloadFeedbackFile" {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// link is definitely just a download link to the exercise or the solution
|
|
||||||
let name = row
|
|
||||||
.select(&form_name)
|
|
||||||
.next()
|
|
||||||
.context("link without file name")?
|
|
||||||
.text()
|
|
||||||
.collect::<String>()
|
|
||||||
.trim()
|
|
||||||
.to_owned();
|
|
||||||
let item = File { url, name };
|
|
||||||
let mut path = path.clone();
|
|
||||||
// handle files with the same name
|
|
||||||
let filename = file_escape(item.name());
|
|
||||||
let mut parts = filename.rsplitn(2, '.');
|
|
||||||
let extension = parts.next().unwrap_or(&filename);
|
|
||||||
let name = parts.next().unwrap_or("");
|
|
||||||
let mut unique_filename = filename.clone();
|
|
||||||
let mut i = 1;
|
|
||||||
while filenames.contains(&unique_filename) {
|
|
||||||
i += 1;
|
|
||||||
if name.is_empty() {
|
|
||||||
unique_filename = format!("{}{}", extension, i);
|
|
||||||
} else {
|
|
||||||
unique_filename = format!("{}{}.{}", name, i, extension);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
filenames.insert(unique_filename.clone());
|
|
||||||
path.push(unique_filename);
|
|
||||||
let ilias = Arc::clone(&ilias);
|
|
||||||
spawn!(process_gracefully(ilias, path, item));
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Weblink { url, .. } => {
|
Weblink { url, .. } => {
|
||||||
if !ilias.opt.force && fs::metadata(&path).await.is_ok() {
|
ilias::weblink::download(&path, relative_path, ilias, url).await?;
|
||||||
log!(2, "Skipping download, link exists already");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let head_req_result = ilias.head(&url.url).await;
|
|
||||||
let url = match &head_req_result {
|
|
||||||
Err(e) => e.url().context("HEAD request failed")?.as_str(),
|
|
||||||
Ok(head) => head.url().as_str(),
|
|
||||||
};
|
|
||||||
if url.starts_with(ILIAS_URL) {
|
|
||||||
// is a link list
|
|
||||||
if fs::metadata(&path).await.is_err() {
|
|
||||||
create_dir(&path).await?;
|
|
||||||
log!(0, "Writing {}", relative_path.to_string_lossy());
|
|
||||||
}
|
|
||||||
|
|
||||||
let urls = {
|
|
||||||
let html = ilias.get_html(url).await?;
|
|
||||||
html.select(&LINKS)
|
|
||||||
.filter_map(|x| x.value().attr("href").map(|y| (y, x.text().collect::<String>())))
|
|
||||||
.map(|(x, y)| URL::from_href(x).map(|z| (z, y.trim().to_owned())).context("parsing weblink"))
|
|
||||||
.collect::<Result<Vec<_>>>()
|
|
||||||
}?;
|
|
||||||
|
|
||||||
for (url, name) in urls {
|
|
||||||
if url.cmd.as_deref().unwrap_or("") != "callLink" {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let head = ilias.head(url.url.as_str()).await.context("HEAD request to web link failed");
|
|
||||||
if let Some(err) = head.as_ref().err() {
|
|
||||||
warning!(err);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let head = head.unwrap();
|
|
||||||
let url = head.url().as_str();
|
|
||||||
write_file_data(path.join(file_escape(&name)), &mut url.as_bytes()).await?;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log!(0, "Writing {}", relative_path.to_string_lossy());
|
|
||||||
write_file_data(&path, &mut url.as_bytes()).await.context("failed to save weblink URL")?;
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Wiki { .. } => {
|
Wiki { .. } => {
|
||||||
log!(1, "Ignored wiki!");
|
log!(1, "Ignored wiki!");
|
||||||
@ -733,11 +256,17 @@ async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> Result<()> {
|
|||||||
log!(1, "Ignored survey!");
|
log!(1, "Ignored survey!");
|
||||||
},
|
},
|
||||||
Presentation { .. } => {
|
Presentation { .. } => {
|
||||||
log!(1, "Ignored interactive presentation! (visit it yourself, it's probably interesting)");
|
log!(
|
||||||
|
1,
|
||||||
|
"Ignored interactive presentation! (visit it yourself, it's probably interesting)"
|
||||||
|
);
|
||||||
},
|
},
|
||||||
Generic { .. } => {
|
Generic { .. } => {
|
||||||
log!(1, "Ignored generic {:?}", obj)
|
log!(1, "Ignored generic {:?}", obj)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
if PROGRESS_BAR_ENABLED.load(Ordering::SeqCst) {
|
||||||
|
PROGRESS_BAR.inc(1);
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
50
src/queue.rs
Normal file
50
src/queue.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
use futures::Future;
|
||||||
|
use futures_channel::mpsc::{UnboundedReceiver, UnboundedSender};
|
||||||
|
use once_cell::sync::{Lazy, OnceCell};
|
||||||
|
use tokio::{
|
||||||
|
sync::{Semaphore, SemaphorePermit},
|
||||||
|
task::{self, JoinHandle},
|
||||||
|
time,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Global job queue
|
||||||
|
static TASKS: OnceCell<UnboundedSender<JoinHandle<()>>> = OnceCell::new();
|
||||||
|
static TASKS_RUNNING: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0));
|
||||||
|
static REQUEST_TICKETS: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0));
|
||||||
|
|
||||||
|
pub async fn get_request_ticket() {
|
||||||
|
REQUEST_TICKETS.acquire().await.unwrap().forget();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_ticket() -> SemaphorePermit<'static> {
|
||||||
|
TASKS_RUNNING.acquire().await.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn spawn(e: impl Future<Output = ()> + Send + 'static) {
|
||||||
|
TASKS.get().unwrap().unbounded_send(task::spawn(e)).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_download_rate(rate: usize) {
|
||||||
|
task::spawn(async move {
|
||||||
|
let mut interval = time::interval(time::Duration::from_secs_f64(60.0 / rate as f64));
|
||||||
|
loop {
|
||||||
|
interval.tick().await;
|
||||||
|
log!(
|
||||||
|
0,
|
||||||
|
"interval ticked @ {}",
|
||||||
|
std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::SystemTime::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs()
|
||||||
|
);
|
||||||
|
REQUEST_TICKETS.add_permits(1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_parallel_jobs(jobs: usize) -> UnboundedReceiver<JoinHandle<()>> {
|
||||||
|
let (tx, rx) = futures_channel::mpsc::unbounded::<JoinHandle<()>>();
|
||||||
|
TASKS.get_or_init(|| tx.clone());
|
||||||
|
TASKS_RUNNING.add_permits(jobs);
|
||||||
|
rx
|
||||||
|
}
|
21
src/util.rs
21
src/util.rs
@ -1,21 +1,38 @@
|
|||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
|
use bytes::Bytes;
|
||||||
|
use futures::TryStreamExt;
|
||||||
use tokio::fs::File as AsyncFile;
|
use tokio::fs::File as AsyncFile;
|
||||||
use tokio::io::{AsyncRead, BufWriter};
|
use tokio::io::{AsyncRead, BufWriter};
|
||||||
|
use tokio_util::io::StreamReader;
|
||||||
|
|
||||||
|
use std::io;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
|
pub async fn write_stream_to_file(
|
||||||
|
path: &Path,
|
||||||
|
stream: impl futures::Stream<Item = Result<Bytes, reqwest::Error>> + Unpin,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut reader = StreamReader::new(stream.map_err(|x| io::Error::new(io::ErrorKind::Other, x)));
|
||||||
|
write_file_data(&path, &mut reader).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Write all data to the specified path. Will overwrite previous file data.
|
/// Write all data to the specified path. Will overwrite previous file data.
|
||||||
pub async fn write_file_data<R: ?Sized>(path: impl AsRef<Path>, data: &mut R) -> Result<()>
|
pub async fn write_file_data<R: ?Sized>(path: impl AsRef<Path>, data: &mut R) -> Result<()>
|
||||||
where
|
where
|
||||||
R: AsyncRead + Unpin,
|
R: AsyncRead + Unpin,
|
||||||
{
|
{
|
||||||
let file = AsyncFile::create(path.as_ref()).await.context("failed to create file")?;
|
let file = AsyncFile::create(path.as_ref())
|
||||||
|
.await
|
||||||
|
.context("failed to create file")?;
|
||||||
let mut file = BufWriter::new(file);
|
let mut file = BufWriter::new(file);
|
||||||
tokio::io::copy(data, &mut file).await.context("failed to write to file")?;
|
tokio::io::copy(data, &mut file)
|
||||||
|
.await
|
||||||
|
.context("failed to write to file")?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user