ripgrep-all/src/preproc_cache.rs

118 lines
4.1 KiB
Rust
Raw Normal View History

2020-06-08 21:11:43 +00:00
use crate::project_dirs;
use anyhow::{format_err, Context, Result};
2019-06-07 21:17:33 +00:00
use log::*;
use std::{
fmt::Display,
sync::{Arc, RwLock},
};
2019-06-07 17:00:24 +00:00
pub fn open() -> Result<Arc<RwLock<dyn PreprocCache>>> {
2019-06-07 18:12:24 +00:00
Ok(Arc::new(RwLock::new(LmdbCache::open()?)))
2019-06-07 17:00:24 +00:00
}
pub trait PreprocCache {
// possible without second lambda?
fn get_or_run<'a>(
&mut self,
db_name: &str,
key: &[u8],
runner: Box<dyn FnOnce() -> Result<Option<Vec<u8>>> + 'a>,
callback: Box<dyn FnOnce(&[u8]) -> Result<()> + 'a>,
) -> Result<()>;
2019-06-07 17:00:24 +00:00
}
/// opens a LMDB cache
fn open_cache_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>> {
2020-06-08 21:11:43 +00:00
let pd = project_dirs()?;
let app_cache = pd.cache_dir();
std::fs::create_dir_all(app_cache)?;
2019-06-07 17:00:24 +00:00
2020-06-06 11:06:19 +00:00
rkv::Manager::singleton()
2019-06-07 17:00:24 +00:00
.write()
2020-06-06 11:06:19 +00:00
.map_err(|_| format_err!("could not write cache db manager"))?
2020-06-08 21:11:43 +00:00
.get_or_create(app_cache, |p| {
2019-06-07 17:00:24 +00:00
let mut builder = rkv::Rkv::environment_builder();
builder
.set_flags(rkv::EnvironmentFlags::NO_SYNC | rkv::EnvironmentFlags::WRITE_MAP) // not durable cuz it's a cache
// i'm not sure why NO_TLS is needed. otherwise LMDB transactions (open readers) will keep piling up until it fails with
2019-06-14 14:20:48 +00:00
// LmdbError(ReadersFull). Those "open readers" stay even after the corresponding processes exit.
// hope setting this doesn't break integrity
2019-06-07 17:00:24 +00:00
.set_flags(rkv::EnvironmentFlags::NO_TLS)
2019-06-14 14:20:48 +00:00
// sometimes, this seems to cause the data.mdb file to appear as 2GB in size (with holes), but sometimes not?
2019-06-07 17:00:24 +00:00
.set_map_size(2 * 1024 * 1024 * 1024)
.set_max_dbs(100)
.set_max_readers(128);
rkv::Rkv::from_env(p, builder)
})
2020-06-06 11:06:19 +00:00
.map_err(|e| format_err!("could not get/create cache db: {}", e))
2019-06-07 17:00:24 +00:00
}
pub struct LmdbCache {
db_arc: std::sync::Arc<std::sync::RwLock<rkv::Rkv>>,
}
impl LmdbCache {
pub fn open() -> Result<LmdbCache> {
2019-06-07 17:00:24 +00:00
Ok(LmdbCache {
db_arc: open_cache_db()?,
})
}
}
#[derive(Debug)]
struct RkvErrWrap(rkv::StoreError);
impl Display for RkvErrWrap {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::error::Error for RkvErrWrap {}
2019-06-07 17:00:24 +00:00
impl PreprocCache for LmdbCache {
// possible without second lambda?
fn get_or_run<'a>(
&mut self,
db_name: &str,
key: &[u8],
runner: Box<dyn FnOnce() -> Result<Option<Vec<u8>>> + 'a>,
callback: Box<dyn FnOnce(&[u8]) -> Result<()> + 'a>,
) -> Result<()> {
2019-06-07 17:00:24 +00:00
let db_env = self.db_arc.read().unwrap();
let db = db_env
.open_single(db_name, rkv::store::Options::create())
.map_err(RkvErrWrap)
.with_context(|| format_err!("could not open cache db store"))?;
2019-06-07 17:00:24 +00:00
let reader = db_env.read().expect("could not get reader");
let cached = db
.get(&reader, &key)
.map_err(RkvErrWrap)
.with_context(|| format_err!("could not read from db"))?;
2019-06-07 17:00:24 +00:00
match cached {
Some(rkv::Value::Blob(cached)) => {
2019-06-07 21:17:33 +00:00
debug!("got cached");
2019-06-07 17:00:24 +00:00
callback(cached)?;
}
Some(_) => Err(format_err!("Integrity: value not blob"))?,
None => {
2019-06-07 21:17:33 +00:00
debug!("did not get cached");
2019-06-07 17:00:24 +00:00
drop(reader);
if let Some(got) = runner()? {
let mut writer = db_env
.write()
.map_err(RkvErrWrap)
.with_context(|| format_err!("could not open write handle to cache"))?;
2019-06-07 17:00:24 +00:00
db.put(&mut writer, &key, &rkv::Value::Blob(&got))
.map_err(RkvErrWrap)
.with_context(|| format_err!("could not write to cache"))?;
writer
.commit()
.map_err(RkvErrWrap)
.with_context(|| format!("could not write cache"))?;
2019-06-07 17:00:24 +00:00
}
}
};
Ok(())
}
}