add sqlite adapter

This commit is contained in:
phiresky 2019-06-07 16:57:11 +02:00
parent e72d9c607b
commit 957e06fc64
4 changed files with 187 additions and 2 deletions

70
Cargo.lock generated
View File

@ -371,6 +371,14 @@ name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "generic-array"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "glob"
version = "0.2.11"
@ -541,6 +549,26 @@ name = "nom"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "num"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-complex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num-integer 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"num-iter 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
"num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-complex"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-integer"
version = "0.1.41"
@ -550,6 +578,25 @@ dependencies = [
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-iter"
version = "0.1.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"num-integer 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-rational"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-integer 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.2.8"
@ -840,6 +887,7 @@ dependencies = [
"rusqlite 0.18.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
"size_format 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tar 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)",
"tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -874,6 +922,7 @@ dependencies = [
"bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fallible-iterator 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fallible-streaming-iterator 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libsqlite3-sys 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lru-cache 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -949,6 +998,15 @@ dependencies = [
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "size_format"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"generic-array 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "smallvec"
version = "0.6.9"
@ -1048,6 +1106,11 @@ dependencies = [
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "typenum"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ucd-util"
version = "0.1.3"
@ -1246,6 +1309,7 @@ dependencies = [
"checksum flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f87e68aa82b2de08a6e037f1385455759df6e445a8df5e005b4297191dbf18aa"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum generic-array 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3c0f28c2f5bfb5960175af447a2da7c18900693738343dc896ffbcabd9839592"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum humantime 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3ca7e5f2e110db35f93b837c81797f3714500b81d517bf20c431b16d3ca4f114"
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
@ -1269,7 +1333,11 @@ dependencies = [
"checksum miniz_oxide_c_api 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7fe927a42e3807ef71defb191dc87d4e24479b221e67015fe38ae2b7b447bab"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
"checksum nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf51a729ecf40266a2368ad335a5fdde43471f545a967109cd62146ecf8b66ff"
"checksum num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cf4825417e1e1406b3782a8ce92f4d53f26ec055e3622e1881ca8e9f5f9e08db"
"checksum num-complex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "107b9be86cd2481930688277b675b0114578227f034674726605b8a482d8baf8"
"checksum num-integer 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "b85e541ef8255f6cf42bbfe4ef361305c6c135d10919ecc26126c4e5ae94bc09"
"checksum num-iter 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "76bd5272412d173d6bf9afdf98db8612bbabc9a7a830b7bfc9c188911716132e"
"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10"
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
"checksum num_cpus 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1a23f0ed30a54abaa0c7e83b1d2d87ada7c3c23078d1d87815af3e3b6385fbba"
"checksum numtoa 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef"
@ -1314,6 +1382,7 @@ dependencies = [
"checksum serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "32746bf0f26eab52f06af0d0aa1984f641341d06d8d673c693871da2d188c9be"
"checksum serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "46a3223d0c9ba936b61c0d2e3e559e3217dbfb8d65d06d26e8b3c25de38bae3e"
"checksum serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)" = "5a23aa71d4a4d43fdbfaac00eff68ba8a06a51759a89ac3304323e800c4dd40d"
"checksum size_format 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6ed5f6ab2122c6dec69dca18c72fa4590a27e581ad20d44960fe74c032a0b23b"
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
"checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe"
@ -1325,6 +1394,7 @@ dependencies = [
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
"checksum tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aab921ca9b828f83389f3f3c5e77404612547081e5222eb3a23d06184f6813af"
"checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426"

View File

@ -1,7 +1,7 @@
[package]
name = "rga"
description = "ripgrep, except for pdf, ebooks, Office documents, etc"
description = "ripgrep but for pdf, ebooks, Office documents, archives, etc"
license = "AGPL-3.0-or-later"
version = "0.4.0"
repository = "https://github.com/phiresky/rga"
@ -37,4 +37,5 @@ tar = "0.4.26"
chrono = "0.4.6"
encoding_rs = "0.8.17"
encoding_rs_io = "0.1.6"
rusqlite = "0.18.0"
rusqlite = { version = "0.18.0", features=["vtab"] } # "bundled"
size_format = "1.0.2"

View File

@ -2,6 +2,7 @@ pub mod ffmpeg;
pub mod pandoc;
pub mod poppler;
pub mod spawning;
pub mod sqlite;
pub mod tar;
pub mod zip;
use failure::*;
@ -23,6 +24,7 @@ pub enum Matcher {
pub struct AdapterMeta {
pub name: String,
// version identifier. used to key cache entries, change if your output format changes
pub version: i32,
pub matchers: Vec<Matcher>,
}
@ -67,6 +69,7 @@ pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
Rc::new(poppler::PopplerAdapter),
Rc::new(zip::ZipAdapter),
Rc::new(tar::TarAdapter),
Rc::new(sqlite::SqliteAdapter),
];
adapters
}

111
src/adapters/sqlite.rs Normal file
View File

@ -0,0 +1,111 @@
use super::spawning::map_exe_error;
use super::*;
use failure::*;
use lazy_static::lazy_static;
use rusqlite::types::{ToSql, ValueRef};
use rusqlite::*;
use serde::{Deserialize, Serialize};
use std::convert::TryInto;
use std::io::BufReader;
use std::process::*;
static EXTENSIONS: &[&str] = &["db", "db3", "sqlite", "sqlite3"];
lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "sqlite".to_owned(),
version: 1,
matchers: EXTENSIONS
.iter()
.map(|s| Matcher::FileExtension(s.to_string()))
.collect(),
};
}
#[derive(Default)]
pub struct SqliteAdapter;
impl SqliteAdapter {
pub fn new() -> SqliteAdapter {
SqliteAdapter
}
}
impl GetMetadata for SqliteAdapter {
fn metadata(&self) -> &AdapterMeta {
&METADATA
}
}
fn format_blob(b: ValueRef) -> String {
use ValueRef::*;
match b {
Null => "NULL".to_owned(),
Integer(i) => format!("{}", i),
Real(i) => format!("{}", i),
Text(i) => format!("'{}'", i.replace("'", "''")),
Blob(b) => format!(
"[blob {}B]",
size_format::SizeFormatterSI::new(
// can't be larger than 2GB anyways
b.len().try_into().unwrap()
)
),
}
}
impl FileAdapter for SqliteAdapter {
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
let AdaptInfo {
is_real_file,
filepath_hint,
oup,
line_prefix,
..
} = ai;
if !is_real_file {
// todo: read to memory and then use that blob if size < max
writeln!(oup, "{}[rga: skipping sqlite in archive]", line_prefix,)?;
return Ok(());
}
let inp_fname = filepath_hint;
let conn = Connection::open_with_flags(inp_fname, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
let tables: Vec<String> = conn
.prepare("select name from sqlite_master where type='table'")?
.query_map(NO_PARAMS, |r| r.get::<_, String>(0))?
.filter_map(|e| e.ok())
.collect();
eprintln!("db has {} tables", tables.len());
for table in tables {
// can't use query param at that position
let mut sel = conn.prepare(&format!(
"select * from {}",
rusqlite::vtab::escape_double_quote(&table)
))?;
let mut z = sel.query(NO_PARAMS)?;
let col_names: Vec<String> = z
.column_names()
.ok_or_else(|| format_err!("no column names"))?
.into_iter()
.map(|e| e.to_owned())
.collect();
// writeln!(oup, "{}: {}", table, cols.join(", "))?;
// kind of shitty (lossy) output. maybe output real csv or something?
while let Some(row) = z.next()? {
writeln!(
oup,
"{}: {}",
table,
col_names
.iter()
.enumerate()
.map(|(i, e)| format!("{}={}", e, format_blob(row.get_raw(i))))
.collect::<Vec<String>>()
.join(", ")
)?;
}
}
Ok(())
}
}