mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Handle disabled links + save main course page
+ misc. cleanups
This commit is contained in:
parent
5c17ec0326
commit
e94ce97896
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -16,7 +16,6 @@ dependencies = [
|
|||||||
"ignore",
|
"ignore",
|
||||||
"indicatif",
|
"indicatif",
|
||||||
"keyring",
|
"keyring",
|
||||||
"lazy_static",
|
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"regex",
|
"regex",
|
||||||
|
@ -18,7 +18,6 @@ futures = "0.3.8"
|
|||||||
futures-util = "0.3.8"
|
futures-util = "0.3.8"
|
||||||
futures-channel = "0.3.8"
|
futures-channel = "0.3.8"
|
||||||
regex = "1.3.7"
|
regex = "1.3.7"
|
||||||
lazy_static = "1.4.0"
|
|
||||||
parking_lot = "0.11.0"
|
parking_lot = "0.11.0"
|
||||||
structopt = "0.3.13"
|
structopt = "0.3.13"
|
||||||
rpassword = "5.0.0"
|
rpassword = "5.0.0"
|
||||||
|
121
src/main.rs
121
src/main.rs
@ -7,7 +7,6 @@ use futures_channel::mpsc::UnboundedSender;
|
|||||||
use futures_util::{stream::TryStreamExt, StreamExt};
|
use futures_util::{stream::TryStreamExt, StreamExt};
|
||||||
use ignore::gitignore::Gitignore;
|
use ignore::gitignore::Gitignore;
|
||||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use reqwest::{Client, Proxy};
|
use reqwest::{Client, Proxy};
|
||||||
@ -35,6 +34,10 @@ static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0);
|
|||||||
static PROGRESS_BAR_ENABLED: AtomicBool = AtomicBool::new(false);
|
static PROGRESS_BAR_ENABLED: AtomicBool = AtomicBool::new(false);
|
||||||
static PROGRESS_BAR: Lazy<ProgressBar> = Lazy::new(|| ProgressBar::new(0));
|
static PROGRESS_BAR: Lazy<ProgressBar> = Lazy::new(|| ProgressBar::new(0));
|
||||||
|
|
||||||
|
/// Global job queue
|
||||||
|
static TASKS: Lazy<Mutex<Option<UnboundedSender<JoinHandle<()>>>>> = Lazy::new(Mutex::default);
|
||||||
|
static TASKS_RUNNING: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0));
|
||||||
|
|
||||||
macro_rules! log {
|
macro_rules! log {
|
||||||
($lvl:expr, $($t:expr),+) => {
|
($lvl:expr, $($t:expr),+) => {
|
||||||
#[allow(unused_comparisons)] // 0 <= 0
|
#[allow(unused_comparisons)] // 0 <= 0
|
||||||
@ -141,13 +144,19 @@ async fn main() {
|
|||||||
PROGRESS_BAR.set_message("initializing..");
|
PROGRESS_BAR.set_message("initializing..");
|
||||||
}
|
}
|
||||||
if let Some(url) = ilias.opt.sync_url.as_ref() {
|
if let Some(url) = ilias.opt.sync_url.as_ref() {
|
||||||
for item in ilias.get_course_content(&URL::from_href(url).expect("invalid URL")).await.expect("invalid response") {
|
// TODO: this should be unified with the download logic below
|
||||||
let item = item.expect("invalid item");
|
let course = ilias.get_course_content(&URL::from_href(url).expect("invalid URL")).await.expect("invalid response");
|
||||||
|
if let Some(s) = course.1.as_ref() {
|
||||||
|
let path = ilias.opt.output.join("course.html");
|
||||||
|
write_file_data(&path, &mut s.as_bytes()).await.expect("failed to write course page html");
|
||||||
|
}
|
||||||
|
for item in course.0 {
|
||||||
|
if let Ok(item) = item {
|
||||||
let ilias = Arc::clone(&ilias);
|
let ilias = Arc::clone(&ilias);
|
||||||
let mut path = ilias.opt.output.clone();
|
let path = ilias.opt.output.join(file_escape(item.name()));
|
||||||
path.push(file_escape(item.name()));
|
|
||||||
tx.unbounded_send(task::spawn(process_gracefully(ilias, path, item))).unwrap();
|
tx.unbounded_send(task::spawn(process_gracefully(ilias, path, item))).unwrap();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let desktop = ilias.personal_desktop().await.context("Failed to load personal desktop");
|
let desktop = ilias.personal_desktop().await.context("Failed to load personal desktop");
|
||||||
match desktop {
|
match desktop {
|
||||||
@ -183,11 +192,6 @@ async fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
|
||||||
static ref TASKS: Mutex<Option<UnboundedSender<JoinHandle<()>>>> = Mutex::default();
|
|
||||||
static ref TASKS_RUNNING: Semaphore = Semaphore::new(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! spawn {
|
macro_rules! spawn {
|
||||||
($e:expr) => {
|
($e:expr) => {
|
||||||
TASKS.lock().as_ref().unwrap().unbounded_send(task::spawn($e)).unwrap();
|
TASKS.lock().as_ref().unwrap().unbounded_send(task::spawn($e)).unwrap();
|
||||||
@ -264,34 +268,32 @@ async fn handle_gracefully(fut: impl Future<Output = Result<()>>) {
|
|||||||
|
|
||||||
#[allow(non_upper_case_globals)]
|
#[allow(non_upper_case_globals)]
|
||||||
mod selectors {
|
mod selectors {
|
||||||
use lazy_static::lazy_static;
|
use once_cell::sync::Lazy;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use scraper::Selector;
|
use scraper::Selector;
|
||||||
// construct CSS selectors once
|
// construct CSS selectors once
|
||||||
lazy_static! {
|
pub static a: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||||
pub static ref a: Selector = Selector::parse("a").unwrap();
|
pub static a_target_blank: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap());
|
||||||
pub static ref a_target_blank: Selector = Selector::parse(r#"a[target="_blank"]"#).unwrap();
|
pub static img: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap());
|
||||||
pub static ref img: Selector = Selector::parse("img").unwrap();
|
pub static table: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap());
|
||||||
pub static ref table: Selector = Selector::parse("table").unwrap();
|
pub static video_tr: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap());
|
||||||
pub static ref video_tr: Selector = Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap();
|
pub static links_in_table: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap());
|
||||||
pub static ref links_in_table: Selector = Selector::parse("tbody tr td a").unwrap();
|
pub static th: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap());
|
||||||
pub static ref th: Selector = Selector::parse("th").unwrap();
|
pub static td: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
|
||||||
pub static ref td: Selector = Selector::parse("td").unwrap();
|
pub static tr: Lazy<Selector> = Lazy::new(|| Selector::parse("tr").unwrap());
|
||||||
pub static ref tr: Selector = Selector::parse("tr").unwrap();
|
pub static post_row: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostRow").unwrap());
|
||||||
pub static ref post_row: Selector = Selector::parse(".ilFrmPostRow").unwrap();
|
pub static post_title: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostTitle").unwrap());
|
||||||
pub static ref post_title: Selector = Selector::parse(".ilFrmPostTitle").unwrap();
|
pub static post_container: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostContentContainer").unwrap());
|
||||||
pub static ref post_container: Selector = Selector::parse(".ilFrmPostContentContainer").unwrap();
|
pub static post_attachments: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostAttachmentsContainer").unwrap());
|
||||||
pub static ref post_attachments: Selector = Selector::parse(".ilFrmPostAttachmentsContainer").unwrap();
|
pub static span_small: Lazy<Selector> = Lazy::new(|| Selector::parse("span.small").unwrap());
|
||||||
pub static ref span_small: Selector = Selector::parse("span.small").unwrap();
|
pub static forum_pages: Lazy<Selector> = Lazy::new(|| Selector::parse("div.ilTableNav > table > tbody > tr > td > a").unwrap());
|
||||||
pub static ref forum_pages: Selector = Selector::parse("div.ilTableNav > table > tbody > tr > td > a").unwrap();
|
pub static alert_danger: Lazy<Selector> = Lazy::new(|| Selector::parse("div.alert-danger").unwrap());
|
||||||
pub static ref alert_danger: Selector = Selector::parse("div.alert-danger").unwrap();
|
pub static form_group: Lazy<Selector> = Lazy::new(|| Selector::parse(".form-group").unwrap());
|
||||||
pub static ref tree_highlighted: Selector = Selector::parse("span.ilHighlighted").unwrap();
|
pub static form_name: Lazy<Selector> = Lazy::new(|| Selector::parse(".il_InfoScreenProperty").unwrap());
|
||||||
pub static ref form_group: Selector = Selector::parse(".form-group").unwrap();
|
pub static cmd_node_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"cmdNode=uf:\w\w"#).unwrap());
|
||||||
pub static ref form_name: Selector = Selector::parse(".il_InfoScreenProperty").unwrap();
|
pub static image_src_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap());
|
||||||
pub static ref cmd_node_regex: Regex = Regex::new(r#"cmdNode=uf:\w\w"#).unwrap();
|
pub static XOCT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap());
|
||||||
pub static ref image_src_regex: Regex = Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap();
|
pub static il_content_container: Lazy<Selector> = Lazy::new(|| Selector::parse("#ilContentContainer").unwrap());
|
||||||
pub static ref XOCT_REGEX: Regex = Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
use crate::selectors::*;
|
use crate::selectors::*;
|
||||||
|
|
||||||
@ -309,15 +311,17 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
|||||||
}
|
}
|
||||||
log!(1, "Syncing {} {}", obj.kind(), relative_path.to_string_lossy());
|
log!(1, "Syncing {} {}", obj.kind(), relative_path.to_string_lossy());
|
||||||
log!(2, " URL: {}", obj.url().url);
|
log!(2, " URL: {}", obj.url().url);
|
||||||
|
if obj.is_dir() {
|
||||||
|
create_dir(&path).await?;
|
||||||
|
}
|
||||||
match &obj {
|
match &obj {
|
||||||
Course { url, name } => {
|
Course { url, name } => {
|
||||||
create_dir(&path).await?;
|
|
||||||
let content = if ilias.opt.content_tree {
|
let content = if ilias.opt.content_tree {
|
||||||
let html = ilias.download(&url.url).await?.text().await?;
|
let html = ilias.download(&url.url).await?.text().await?;
|
||||||
let cmd_node = cmd_node_regex.find(&html).context("can't find cmdNode")?.as_str()[8..].to_owned();
|
let cmd_node = cmd_node_regex.find(&html).context("can't find cmdNode")?.as_str()[8..].to_owned();
|
||||||
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
|
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
|
||||||
match content_tree {
|
match content_tree {
|
||||||
Ok(tree) => tree,
|
Ok(tree) => (tree.into_iter().map(Result::Ok).collect(), None),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
|
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
|
||||||
// TODO: this is probably never the case for folders?
|
// TODO: this is probably never the case for folders?
|
||||||
@ -325,30 +329,28 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
|||||||
return Ok(()); // ignore groups we are not in
|
return Ok(()); // ignore groups we are not in
|
||||||
}
|
}
|
||||||
warning!(name, "falling back to incomplete course content extractor!", e);
|
warning!(name, "falling back to incomplete course content extractor!", e);
|
||||||
ilias.get_course_content(&url).await?.into_iter().flat_map(Result::ok).collect() // TODO: perhaps don't download almost the same content 3x
|
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ilias.get_course_content(&url).await?.into_iter().flat_map(Result::ok).collect()
|
ilias.get_course_content(&url).await?
|
||||||
};
|
};
|
||||||
for item in content {
|
if let Some(s) = content.1.as_ref() {
|
||||||
let mut path = path.clone();
|
let path = ilias.opt.output.join("course.html");
|
||||||
path.push(file_escape(item.name()));
|
write_file_data(&path, &mut s.as_bytes()).await.expect("failed to write course page html");
|
||||||
|
}
|
||||||
|
for item in content.0 {
|
||||||
|
let item = item?;
|
||||||
|
let path = path.join(file_escape(item.name()));
|
||||||
let ilias = Arc::clone(&ilias);
|
let ilias = Arc::clone(&ilias);
|
||||||
spawn!(process_gracefully(ilias, path, item));
|
spawn!(process_gracefully(ilias, path, item));
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Folder { url, .. } => {
|
Folder { url, .. } => {
|
||||||
create_dir(&path).await?;
|
|
||||||
let content = ilias.get_course_content(&url).await?;
|
let content = ilias.get_course_content(&url).await?;
|
||||||
for item in content {
|
for item in content.0 {
|
||||||
if item.is_err() {
|
let item = item?;
|
||||||
log!(1, "Ignoring: {:?}", item.err().unwrap());
|
let path = path.join(file_escape(item.name()));
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let item = item.unwrap();
|
|
||||||
let mut path = path.clone();
|
|
||||||
path.push(file_escape(item.name()));
|
|
||||||
let ilias = Arc::clone(&ilias);
|
let ilias = Arc::clone(&ilias);
|
||||||
spawn!(process_gracefully(ilias, path, item));
|
spawn!(process_gracefully(ilias, path, item));
|
||||||
}
|
}
|
||||||
@ -372,7 +374,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
|||||||
if ilias.opt.no_videos {
|
if ilias.opt.no_videos {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
create_dir(&path).await?;
|
|
||||||
let full_url = {
|
let full_url = {
|
||||||
// first find the link to full video list
|
// first find the link to full video list
|
||||||
let list_url = format!("{}ilias.php?ref_id={}&cmdClass=xocteventgui&cmdNode=nc:n4:14u&baseClass=ilObjPluginDispatchGUI&lang=de&limit=20&cmd=asyncGetTableGUI&cmdMode=asynch", ILIAS_URL, url.ref_id);
|
let list_url = format!("{}ilias.php?ref_id={}&cmdClass=xocteventgui&cmdNode=nc:n4:14u&baseClass=ilObjPluginDispatchGUI&lang=de&limit=20&cmd=asyncGetTableGUI&cmdMode=asynch", ILIAS_URL, url.ref_id);
|
||||||
@ -490,7 +491,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
|||||||
if !ilias.opt.forum {
|
if !ilias.opt.forum {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
create_dir(&path).await?;
|
|
||||||
let url = &url.url;
|
let url = &url.url;
|
||||||
let html = {
|
let html = {
|
||||||
let data = ilias.download(url);
|
let data = ilias.download(url);
|
||||||
@ -581,7 +581,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
|||||||
if !ilias.opt.forum {
|
if !ilias.opt.forum {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
create_dir(&path).await?;
|
|
||||||
let mut all_images = Vec::new();
|
let mut all_images = Vec::new();
|
||||||
let mut attachments = Vec::new();
|
let mut attachments = Vec::new();
|
||||||
{
|
{
|
||||||
@ -698,7 +697,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
ExerciseHandler { url, .. } => {
|
ExerciseHandler { url, .. } => {
|
||||||
create_dir(&path).await?;
|
|
||||||
let html = ilias.get_html(&url.url).await?;
|
let html = ilias.get_html(&url.url).await?;
|
||||||
let mut filenames = HashSet::new();
|
let mut filenames = HashSet::new();
|
||||||
for row in html.select(&form_group) {
|
for row in html.select(&form_group) {
|
||||||
@ -986,18 +984,20 @@ impl ILIAS {
|
|||||||
let container_items = Selector::parse("div.il_ContainerListItem").unwrap();
|
let container_items = Selector::parse("div.il_ContainerListItem").unwrap();
|
||||||
let container_item_title = Selector::parse("a.il_ContainerItemTitle").unwrap();
|
let container_item_title = Selector::parse("a.il_ContainerItemTitle").unwrap();
|
||||||
html.select(&container_items)
|
html.select(&container_items)
|
||||||
.map(|item| {
|
.flat_map(|item| {
|
||||||
item.select(&container_item_title)
|
item.select(&container_item_title)
|
||||||
.next()
|
.next()
|
||||||
.map(|link| Object::from_link(item, link))
|
.map(|link| Object::from_link(item, link))
|
||||||
.context("can't find link").flatten2()
|
// items without links are ignored
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_course_content(&self, url: &URL) -> Result<Vec<Result<Object>>> {
|
/// Returns subfolders and the main text on the course page.
|
||||||
|
async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
||||||
let html = self.get_html(&url.url).await?;
|
let html = self.get_html(&url.url).await?;
|
||||||
Ok(ILIAS::get_items(&html))
|
let main_text = html.select(&il_content_container).next().map(|x| x.inner_html());
|
||||||
|
Ok((ILIAS::get_items(&html), main_text))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn personal_desktop(&self) -> Result<Dashboard> {
|
async fn personal_desktop(&self) -> Result<Dashboard> {
|
||||||
@ -1114,9 +1114,8 @@ impl Object {
|
|||||||
| Thread { .. }
|
| Thread { .. }
|
||||||
| Wiki { .. }
|
| Wiki { .. }
|
||||||
| ExerciseHandler { .. }
|
| ExerciseHandler { .. }
|
||||||
| Presentation { .. }
|
|
||||||
| PluginDispatch { .. } => true,
|
| PluginDispatch { .. } => true,
|
||||||
File { .. } | Video { .. } | Weblink { .. } | Survey { .. } | Generic { .. } => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user