mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Handle disabled links + save main course page
+ misc. cleanups
This commit is contained in:
parent
5c17ec0326
commit
e94ce97896
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -16,7 +16,6 @@ dependencies = [
|
||||
"ignore",
|
||||
"indicatif",
|
||||
"keyring",
|
||||
"lazy_static",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"regex",
|
||||
|
@ -18,7 +18,6 @@ futures = "0.3.8"
|
||||
futures-util = "0.3.8"
|
||||
futures-channel = "0.3.8"
|
||||
regex = "1.3.7"
|
||||
lazy_static = "1.4.0"
|
||||
parking_lot = "0.11.0"
|
||||
structopt = "0.3.13"
|
||||
rpassword = "5.0.0"
|
||||
|
121
src/main.rs
121
src/main.rs
@ -7,7 +7,6 @@ use futures_channel::mpsc::UnboundedSender;
|
||||
use futures_util::{stream::TryStreamExt, StreamExt};
|
||||
use ignore::gitignore::Gitignore;
|
||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use parking_lot::Mutex;
|
||||
use reqwest::{Client, Proxy};
|
||||
@ -35,6 +34,10 @@ static LOG_LEVEL: AtomicUsize = AtomicUsize::new(0);
|
||||
static PROGRESS_BAR_ENABLED: AtomicBool = AtomicBool::new(false);
|
||||
static PROGRESS_BAR: Lazy<ProgressBar> = Lazy::new(|| ProgressBar::new(0));
|
||||
|
||||
/// Global job queue
|
||||
static TASKS: Lazy<Mutex<Option<UnboundedSender<JoinHandle<()>>>>> = Lazy::new(Mutex::default);
|
||||
static TASKS_RUNNING: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(0));
|
||||
|
||||
macro_rules! log {
|
||||
($lvl:expr, $($t:expr),+) => {
|
||||
#[allow(unused_comparisons)] // 0 <= 0
|
||||
@ -141,13 +144,19 @@ async fn main() {
|
||||
PROGRESS_BAR.set_message("initializing..");
|
||||
}
|
||||
if let Some(url) = ilias.opt.sync_url.as_ref() {
|
||||
for item in ilias.get_course_content(&URL::from_href(url).expect("invalid URL")).await.expect("invalid response") {
|
||||
let item = item.expect("invalid item");
|
||||
// TODO: this should be unified with the download logic below
|
||||
let course = ilias.get_course_content(&URL::from_href(url).expect("invalid URL")).await.expect("invalid response");
|
||||
if let Some(s) = course.1.as_ref() {
|
||||
let path = ilias.opt.output.join("course.html");
|
||||
write_file_data(&path, &mut s.as_bytes()).await.expect("failed to write course page html");
|
||||
}
|
||||
for item in course.0 {
|
||||
if let Ok(item) = item {
|
||||
let ilias = Arc::clone(&ilias);
|
||||
let mut path = ilias.opt.output.clone();
|
||||
path.push(file_escape(item.name()));
|
||||
let path = ilias.opt.output.join(file_escape(item.name()));
|
||||
tx.unbounded_send(task::spawn(process_gracefully(ilias, path, item))).unwrap();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let desktop = ilias.personal_desktop().await.context("Failed to load personal desktop");
|
||||
match desktop {
|
||||
@ -183,11 +192,6 @@ async fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TASKS: Mutex<Option<UnboundedSender<JoinHandle<()>>>> = Mutex::default();
|
||||
static ref TASKS_RUNNING: Semaphore = Semaphore::new(0);
|
||||
}
|
||||
|
||||
macro_rules! spawn {
|
||||
($e:expr) => {
|
||||
TASKS.lock().as_ref().unwrap().unbounded_send(task::spawn($e)).unwrap();
|
||||
@ -264,34 +268,32 @@ async fn handle_gracefully(fut: impl Future<Output = Result<()>>) {
|
||||
|
||||
#[allow(non_upper_case_globals)]
|
||||
mod selectors {
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use scraper::Selector;
|
||||
// construct CSS selectors once
|
||||
lazy_static! {
|
||||
pub static ref a: Selector = Selector::parse("a").unwrap();
|
||||
pub static ref a_target_blank: Selector = Selector::parse(r#"a[target="_blank"]"#).unwrap();
|
||||
pub static ref img: Selector = Selector::parse("img").unwrap();
|
||||
pub static ref table: Selector = Selector::parse("table").unwrap();
|
||||
pub static ref video_tr: Selector = Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap();
|
||||
pub static ref links_in_table: Selector = Selector::parse("tbody tr td a").unwrap();
|
||||
pub static ref th: Selector = Selector::parse("th").unwrap();
|
||||
pub static ref td: Selector = Selector::parse("td").unwrap();
|
||||
pub static ref tr: Selector = Selector::parse("tr").unwrap();
|
||||
pub static ref post_row: Selector = Selector::parse(".ilFrmPostRow").unwrap();
|
||||
pub static ref post_title: Selector = Selector::parse(".ilFrmPostTitle").unwrap();
|
||||
pub static ref post_container: Selector = Selector::parse(".ilFrmPostContentContainer").unwrap();
|
||||
pub static ref post_attachments: Selector = Selector::parse(".ilFrmPostAttachmentsContainer").unwrap();
|
||||
pub static ref span_small: Selector = Selector::parse("span.small").unwrap();
|
||||
pub static ref forum_pages: Selector = Selector::parse("div.ilTableNav > table > tbody > tr > td > a").unwrap();
|
||||
pub static ref alert_danger: Selector = Selector::parse("div.alert-danger").unwrap();
|
||||
pub static ref tree_highlighted: Selector = Selector::parse("span.ilHighlighted").unwrap();
|
||||
pub static ref form_group: Selector = Selector::parse(".form-group").unwrap();
|
||||
pub static ref form_name: Selector = Selector::parse(".il_InfoScreenProperty").unwrap();
|
||||
pub static ref cmd_node_regex: Regex = Regex::new(r#"cmdNode=uf:\w\w"#).unwrap();
|
||||
pub static ref image_src_regex: Regex = Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap();
|
||||
pub static ref XOCT_REGEX: Regex = Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap();
|
||||
}
|
||||
pub static a: Lazy<Selector> = Lazy::new(|| Selector::parse("a").unwrap());
|
||||
pub static a_target_blank: Lazy<Selector> = Lazy::new(|| Selector::parse(r#"a[target="_blank"]"#).unwrap());
|
||||
pub static img: Lazy<Selector> = Lazy::new(|| Selector::parse("img").unwrap());
|
||||
pub static table: Lazy<Selector> = Lazy::new(|| Selector::parse("table").unwrap());
|
||||
pub static video_tr: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilTableOuter > div > table > tbody > tr").unwrap());
|
||||
pub static links_in_table: Lazy<Selector> = Lazy::new(|| Selector::parse("tbody tr td a").unwrap());
|
||||
pub static th: Lazy<Selector> = Lazy::new(|| Selector::parse("th").unwrap());
|
||||
pub static td: Lazy<Selector> = Lazy::new(|| Selector::parse("td").unwrap());
|
||||
pub static tr: Lazy<Selector> = Lazy::new(|| Selector::parse("tr").unwrap());
|
||||
pub static post_row: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostRow").unwrap());
|
||||
pub static post_title: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostTitle").unwrap());
|
||||
pub static post_container: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostContentContainer").unwrap());
|
||||
pub static post_attachments: Lazy<Selector> = Lazy::new(|| Selector::parse(".ilFrmPostAttachmentsContainer").unwrap());
|
||||
pub static span_small: Lazy<Selector> = Lazy::new(|| Selector::parse("span.small").unwrap());
|
||||
pub static forum_pages: Lazy<Selector> = Lazy::new(|| Selector::parse("div.ilTableNav > table > tbody > tr > td > a").unwrap());
|
||||
pub static alert_danger: Lazy<Selector> = Lazy::new(|| Selector::parse("div.alert-danger").unwrap());
|
||||
pub static form_group: Lazy<Selector> = Lazy::new(|| Selector::parse(".form-group").unwrap());
|
||||
pub static form_name: Lazy<Selector> = Lazy::new(|| Selector::parse(".il_InfoScreenProperty").unwrap());
|
||||
pub static cmd_node_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"cmdNode=uf:\w\w"#).unwrap());
|
||||
pub static image_src_regex: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\./data/produktiv/mobs/mm_(\d+)/([^?]+).+"#).unwrap());
|
||||
pub static XOCT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap());
|
||||
pub static il_content_container: Lazy<Selector> = Lazy::new(|| Selector::parse("#ilContentContainer").unwrap());
|
||||
}
|
||||
use crate::selectors::*;
|
||||
|
||||
@ -309,15 +311,17 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
||||
}
|
||||
log!(1, "Syncing {} {}", obj.kind(), relative_path.to_string_lossy());
|
||||
log!(2, " URL: {}", obj.url().url);
|
||||
if obj.is_dir() {
|
||||
create_dir(&path).await?;
|
||||
}
|
||||
match &obj {
|
||||
Course { url, name } => {
|
||||
create_dir(&path).await?;
|
||||
let content = if ilias.opt.content_tree {
|
||||
let html = ilias.download(&url.url).await?.text().await?;
|
||||
let cmd_node = cmd_node_regex.find(&html).context("can't find cmdNode")?.as_str()[8..].to_owned();
|
||||
let content_tree = ilias.get_course_content_tree(&url.ref_id, &cmd_node).await;
|
||||
match content_tree {
|
||||
Ok(tree) => tree,
|
||||
Ok(tree) => (tree.into_iter().map(Result::Ok).collect(), None),
|
||||
Err(e) => {
|
||||
// some folders are hidden on the course page and can only be found via the RSS feed / recent activity / content tree sidebar
|
||||
// TODO: this is probably never the case for folders?
|
||||
@ -325,30 +329,28 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
||||
return Ok(()); // ignore groups we are not in
|
||||
}
|
||||
warning!(name, "falling back to incomplete course content extractor!", e);
|
||||
ilias.get_course_content(&url).await?.into_iter().flat_map(Result::ok).collect() // TODO: perhaps don't download almost the same content 3x
|
||||
ilias.get_course_content(&url).await? // TODO: perhaps don't download almost the same content 3x
|
||||
},
|
||||
}
|
||||
} else {
|
||||
ilias.get_course_content(&url).await?.into_iter().flat_map(Result::ok).collect()
|
||||
ilias.get_course_content(&url).await?
|
||||
};
|
||||
for item in content {
|
||||
let mut path = path.clone();
|
||||
path.push(file_escape(item.name()));
|
||||
if let Some(s) = content.1.as_ref() {
|
||||
let path = ilias.opt.output.join("course.html");
|
||||
write_file_data(&path, &mut s.as_bytes()).await.expect("failed to write course page html");
|
||||
}
|
||||
for item in content.0 {
|
||||
let item = item?;
|
||||
let path = path.join(file_escape(item.name()));
|
||||
let ilias = Arc::clone(&ilias);
|
||||
spawn!(process_gracefully(ilias, path, item));
|
||||
}
|
||||
},
|
||||
Folder { url, .. } => {
|
||||
create_dir(&path).await?;
|
||||
let content = ilias.get_course_content(&url).await?;
|
||||
for item in content {
|
||||
if item.is_err() {
|
||||
log!(1, "Ignoring: {:?}", item.err().unwrap());
|
||||
continue;
|
||||
}
|
||||
let item = item.unwrap();
|
||||
let mut path = path.clone();
|
||||
path.push(file_escape(item.name()));
|
||||
for item in content.0 {
|
||||
let item = item?;
|
||||
let path = path.join(file_escape(item.name()));
|
||||
let ilias = Arc::clone(&ilias);
|
||||
spawn!(process_gracefully(ilias, path, item));
|
||||
}
|
||||
@ -372,7 +374,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
||||
if ilias.opt.no_videos {
|
||||
return Ok(());
|
||||
}
|
||||
create_dir(&path).await?;
|
||||
let full_url = {
|
||||
// first find the link to full video list
|
||||
let list_url = format!("{}ilias.php?ref_id={}&cmdClass=xocteventgui&cmdNode=nc:n4:14u&baseClass=ilObjPluginDispatchGUI&lang=de&limit=20&cmd=asyncGetTableGUI&cmdMode=asynch", ILIAS_URL, url.ref_id);
|
||||
@ -490,7 +491,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
||||
if !ilias.opt.forum {
|
||||
return Ok(());
|
||||
}
|
||||
create_dir(&path).await?;
|
||||
let url = &url.url;
|
||||
let html = {
|
||||
let data = ilias.download(url);
|
||||
@ -581,7 +581,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
||||
if !ilias.opt.forum {
|
||||
return Ok(());
|
||||
}
|
||||
create_dir(&path).await?;
|
||||
let mut all_images = Vec::new();
|
||||
let mut attachments = Vec::new();
|
||||
{
|
||||
@ -698,7 +697,6 @@ async fn process(ilias: Arc<ILIAS>, mut path: PathBuf, obj: Object) -> Result<()
|
||||
}
|
||||
},
|
||||
ExerciseHandler { url, .. } => {
|
||||
create_dir(&path).await?;
|
||||
let html = ilias.get_html(&url.url).await?;
|
||||
let mut filenames = HashSet::new();
|
||||
for row in html.select(&form_group) {
|
||||
@ -986,18 +984,20 @@ impl ILIAS {
|
||||
let container_items = Selector::parse("div.il_ContainerListItem").unwrap();
|
||||
let container_item_title = Selector::parse("a.il_ContainerItemTitle").unwrap();
|
||||
html.select(&container_items)
|
||||
.map(|item| {
|
||||
.flat_map(|item| {
|
||||
item.select(&container_item_title)
|
||||
.next()
|
||||
.map(|link| Object::from_link(item, link))
|
||||
.context("can't find link").flatten2()
|
||||
// items without links are ignored
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn get_course_content(&self, url: &URL) -> Result<Vec<Result<Object>>> {
|
||||
/// Returns subfolders and the main text on the course page.
|
||||
async fn get_course_content(&self, url: &URL) -> Result<(Vec<Result<Object>>, Option<String>)> {
|
||||
let html = self.get_html(&url.url).await?;
|
||||
Ok(ILIAS::get_items(&html))
|
||||
let main_text = html.select(&il_content_container).next().map(|x| x.inner_html());
|
||||
Ok((ILIAS::get_items(&html), main_text))
|
||||
}
|
||||
|
||||
async fn personal_desktop(&self) -> Result<Dashboard> {
|
||||
@ -1114,9 +1114,8 @@ impl Object {
|
||||
| Thread { .. }
|
||||
| Wiki { .. }
|
||||
| ExerciseHandler { .. }
|
||||
| Presentation { .. }
|
||||
| PluginDispatch { .. } => true,
|
||||
File { .. } | Video { .. } | Weblink { .. } | Survey { .. } | Generic { .. } => false,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user