mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Paginate forum thread posts and warn about ignored forum pages
This commit is contained in:
parent
74ea9bb71b
commit
f7abcda316
51
src/main.rs
51
src/main.rs
@ -1,4 +1,4 @@
|
|||||||
use futures_util::stream::TryStreamExt;
|
use futures_util::stream::{StreamExt, TryStreamExt};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
@ -550,7 +550,7 @@ fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::F
|
|||||||
let html = Html::parse_document(&html_text);
|
let html = Html::parse_document(&html_text);
|
||||||
//https://ilias.studium.kit.edu/ilias.php?ref_id=122&cmdClass=ilobjforumgui&frm_tt_e39_122_trows=800&cmd=showThreads&cmdNode=uf:lg&baseClass=ilrepositorygui
|
//https://ilias.studium.kit.edu/ilias.php?ref_id=122&cmdClass=ilobjforumgui&frm_tt_e39_122_trows=800&cmd=showThreads&cmdNode=uf:lg&baseClass=ilrepositorygui
|
||||||
let url = {
|
let url = {
|
||||||
let t800 = html.select(&a).filter(|x| x.value().attr("href").unwrap_or("").contains("trows=800")).next().expect("can't find forum thread count selector");
|
let t800 = html.select(&a).filter(|x| x.value().attr("href").unwrap_or("").contains("trows=800")).next().unwrap_or_else(|| panic!("can't find forum thread count selector in {:?}", path));
|
||||||
t800.value().attr("href").unwrap()
|
t800.value().attr("href").unwrap()
|
||||||
};
|
};
|
||||||
format!("{}{}", ILIAS_URL, url)
|
format!("{}{}", ILIAS_URL, url)
|
||||||
@ -572,6 +572,18 @@ fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::F
|
|||||||
let mut path = path.clone();
|
let mut path = path.clone();
|
||||||
let name = format!("{}_{}", object.url().thr_pk.as_ref().expect("thr_pk not found for thread"), link.text().collect::<String>().replace('/', "-").trim());
|
let name = format!("{}_{}", object.url().thr_pk.as_ref().expect("thr_pk not found for thread"), link.text().collect::<String>().replace('/', "-").trim());
|
||||||
path.push(name);
|
path.push(name);
|
||||||
|
// TODO: set modification date?
|
||||||
|
let saved_posts = {
|
||||||
|
match fs::read_dir(&path) {
|
||||||
|
Ok(stream) => stream.count(),
|
||||||
|
Err(_) => 0
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let available_posts = cells[3].text().next().unwrap().trim().parse::<usize>().unwrap();
|
||||||
|
if available_posts <= saved_posts && !ilias.opt.force {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
println!("New posts in {:?}..", path);
|
||||||
let ilias = Arc::clone(&ilias);
|
let ilias = Arc::clone(&ilias);
|
||||||
task::spawn(async {
|
task::spawn(async {
|
||||||
*TASKS_QUEUED.lock() += 1;
|
*TASKS_QUEUED.lock() += 1;
|
||||||
@ -584,6 +596,10 @@ fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::F
|
|||||||
*TASKS_QUEUED.lock() -= 1;
|
*TASKS_QUEUED.lock() -= 1;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
let pages = Selector::parse("div.ilTableNav > table > tbody > tr > td > a").unwrap();
|
||||||
|
if html.select(&pages).count() > 0 {
|
||||||
|
println!("Ignoring older threads (801st+) in {:?}..", path);
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Thread { url } => {
|
Thread { url } => {
|
||||||
if !ilias.opt.forum {
|
if !ilias.opt.forum {
|
||||||
@ -592,10 +608,6 @@ fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::F
|
|||||||
if let Err(e) = fs::create_dir(&path) {
|
if let Err(e) = fs::create_dir(&path) {
|
||||||
if e.kind() != io::ErrorKind::AlreadyExists {
|
if e.kind() != io::ErrorKind::AlreadyExists {
|
||||||
println!("error: {:?}", e);
|
println!("error: {:?}", e);
|
||||||
}
|
|
||||||
// skip already downloaded
|
|
||||||
// TODO: compare modification date
|
|
||||||
if !ilias.opt.force {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -638,6 +650,33 @@ fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::F
|
|||||||
*TASKS_QUEUED.lock() -= 1;
|
*TASKS_QUEUED.lock() -= 1;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
// pagination
|
||||||
|
let table = Selector::parse("table").unwrap();
|
||||||
|
if let Some(pages) = html.select(&table).next() {
|
||||||
|
let links_in_table = Selector::parse("tbody tr td a").unwrap();
|
||||||
|
if let Some(last) = pages.select(&links_in_table).last() {
|
||||||
|
let text = last.text().collect::<String>();
|
||||||
|
if text.trim() == ">>" {
|
||||||
|
// not last page yet
|
||||||
|
let ilias = Arc::clone(&ilias);
|
||||||
|
let next_page = Thread {
|
||||||
|
url: URL::from_href(last.value().attr("href").unwrap())
|
||||||
|
};
|
||||||
|
task::spawn(async move {
|
||||||
|
*TASKS_QUEUED.lock() += 1;
|
||||||
|
while *TASKS_RUNNING.lock() >= ilias.opt.jobs {
|
||||||
|
tokio::time::delay_for(Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
*TASKS_RUNNING.lock() += 1;
|
||||||
|
process(ilias, path, next_page).await;
|
||||||
|
*TASKS_RUNNING.lock() -= 1;
|
||||||
|
*TASKS_QUEUED.lock() -= 1;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println!("error: unable to find pagination links");
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
o => {
|
o => {
|
||||||
if ilias.opt.verbose > 0 {
|
if ilias.opt.verbose > 0 {
|
||||||
|
Loading…
Reference in New Issue
Block a user