forked from mirrors/gecko-dev
267 lines
11 KiB
Rust
267 lines
11 KiB
Rust
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
use super::CollectionRequest;
|
|
use crate::bso::{IncomingBso, OutgoingBso};
|
|
use crate::client_types::ClientData;
|
|
use crate::{telemetry, CollectionName, Guid, ServerTimestamp};
|
|
use anyhow::Result;
|
|
use std::fmt;
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct CollSyncIds {
|
|
pub global: Guid,
|
|
pub coll: Guid,
|
|
}
|
|
|
|
/// Defines how an engine is associated with a particular set of records
|
|
/// on a sync storage server. It's either disconnected, or believes it is
|
|
/// connected with a specific set of GUIDs. If the server and the engine don't
|
|
/// agree on the exact GUIDs, the engine will assume something radical happened
|
|
/// so it can't believe anything it thinks it knows about the state of the
|
|
/// server (ie, it will "reset" then do a full reconcile)
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum EngineSyncAssociation {
|
|
/// This store is disconnected (although it may be connected in the future).
|
|
Disconnected,
|
|
/// Sync is connected, and has the following sync IDs.
|
|
Connected(CollSyncIds),
|
|
}
|
|
|
|
/// The concrete `SyncEngine` implementations
|
|
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
|
pub enum SyncEngineId {
|
|
// Note that we've derived PartialOrd etc, which uses lexicographic ordering
|
|
// of the variants. We leverage that such that the higher priority engines
|
|
// are listed first.
|
|
// This order matches desktop.
|
|
Passwords,
|
|
Tabs,
|
|
Bookmarks,
|
|
Addresses,
|
|
CreditCards,
|
|
History,
|
|
}
|
|
|
|
impl SyncEngineId {
|
|
// Iterate over all possible engines. Note that we've made a policy decision
|
|
// that this should enumerate in "order" as defined by PartialCmp, and tests
|
|
// enforce this.
|
|
pub fn iter() -> impl Iterator<Item = SyncEngineId> {
|
|
[
|
|
Self::Passwords,
|
|
Self::Tabs,
|
|
Self::Bookmarks,
|
|
Self::Addresses,
|
|
Self::CreditCards,
|
|
Self::History,
|
|
]
|
|
.into_iter()
|
|
}
|
|
|
|
// Get the string identifier for this engine. This must match the strings in SyncEngineSelection.
|
|
pub fn name(&self) -> &'static str {
|
|
match self {
|
|
Self::Passwords => "passwords",
|
|
Self::History => "history",
|
|
Self::Bookmarks => "bookmarks",
|
|
Self::Tabs => "tabs",
|
|
Self::Addresses => "addresses",
|
|
Self::CreditCards => "creditcards",
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for SyncEngineId {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}", self.name())
|
|
}
|
|
}
|
|
|
|
impl TryFrom<&str> for SyncEngineId {
|
|
type Error = String;
|
|
|
|
fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
|
|
match value {
|
|
"passwords" => Ok(Self::Passwords),
|
|
"history" => Ok(Self::History),
|
|
"bookmarks" => Ok(Self::Bookmarks),
|
|
"tabs" => Ok(Self::Tabs),
|
|
"addresses" => Ok(Self::Addresses),
|
|
"creditcards" => Ok(Self::CreditCards),
|
|
_ => Err(value.into()),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A "sync engine" is a thing that knows how to sync. It's often implemented
|
|
/// by a "store" (which is the generic term responsible for all storage
|
|
/// associated with a component, including storage required for sync.)
|
|
///
|
|
/// The model described by this trait is that engines first "stage" sets of incoming records,
|
|
/// then apply them returning outgoing records, then handle the success (or otherwise) of each
|
|
/// batch as it's uploaded.
|
|
///
|
|
/// Staging incoming records is (or should be ;) done in batches - eg, 1000 record chunks.
|
|
/// Some engines will "stage" these into a database temp table, while ones expecting less records
|
|
/// might just store them in memory.
|
|
///
|
|
/// For outgoing records, a single vec is supplied by the engine. The sync client will use the
|
|
/// batch facilities of the server to make multiple POST requests and commit them.
|
|
/// Sadly it's not truly atomic (there's a batch size limit) - so the model reflects that in that
|
|
/// the engine gets told each time a batch is committed, which might happen more than once for the
|
|
/// supplied vec. We should upgrade this model so the engine can avoid reading every outgoing
|
|
/// record into memory at once (ie, we should try and better reflect the upload batch model at
|
|
/// this level)
|
|
///
|
|
/// Sync Engines should not assume they live for exactly one sync, so `prepare_for_sync()` should
|
|
/// clean up any state, including staged records, from previous syncs.
|
|
///
|
|
/// Different engines will produce errors of different types. To accommodate
|
|
/// this, we force them all to return anyhow::Error.
|
|
pub trait SyncEngine {
|
|
fn collection_name(&self) -> CollectionName;
|
|
|
|
/// Prepares the engine for syncing. The tabs engine currently uses this to
|
|
/// store the current list of clients, which it uses to look up device names
|
|
/// and types.
|
|
///
|
|
/// Note that this method is only called by `sync_multiple`, and only if a
|
|
/// command processor is registered. In particular, `prepare_for_sync` will
|
|
/// not be called if the store is synced using `sync::synchronize` or
|
|
/// `sync_multiple::sync_multiple`. It _will_ be called if the store is
|
|
/// synced via the Sync Manager.
|
|
///
|
|
/// TODO(issue #2590): This is pretty cludgey and will be hard to extend for
|
|
/// any case other than the tabs case. We should find another way to support
|
|
/// tabs...
|
|
fn prepare_for_sync(&self, _get_client_data: &dyn Fn() -> ClientData) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
/// Tells the engine what the local encryption key is for the data managed
|
|
/// by the engine. This is only used by collections that store data
|
|
/// encrypted locally and is unrelated to the encryption used by Sync.
|
|
/// The intent is that for such collections, this key can be used to
|
|
/// decrypt local data before it is re-encrypted by Sync and sent to the
|
|
/// storage servers, and similarly, data from the storage servers will be
|
|
/// decrypted by Sync, then encrypted by the local encryption key before
|
|
/// being added to the local database.
|
|
///
|
|
/// The expectation is that the key value is being maintained by the
|
|
/// embedding application in some secure way suitable for the environment
|
|
/// in which the app is running - eg, the OS "keychain". The value of the
|
|
/// key is implementation dependent - it is expected that the engine and
|
|
/// embedding application already have some external agreement about how
|
|
/// to generate keys and in what form they are exchanged. Finally, there's
|
|
/// an assumption that sync engines are short-lived and only live for a
|
|
/// single sync - this means that sync doesn't hold on to the key for an
|
|
/// extended period. In practice, all sync engines which aren't a "bridged
|
|
/// engine" are short lived - we might need to rethink this later if we need
|
|
/// engines with local encryption keys to be used on desktop.
|
|
///
|
|
/// This will panic if called by an engine that doesn't have explicit
|
|
/// support for local encryption keys as that implies a degree of confusion
|
|
/// which shouldn't be possible to ignore.
|
|
fn set_local_encryption_key(&mut self, _key: &str) -> Result<()> {
|
|
unimplemented!("This engine does not support local encryption");
|
|
}
|
|
|
|
/// Stage some incoming records. This might be called multiple times in the same sync
|
|
/// if we fetch the incoming records in batches.
|
|
///
|
|
/// Note there is no timestamp provided here, because the procedure for fetching in batches
|
|
/// means that the timestamp advancing during a batch means we must abort and start again.
|
|
/// The final collection timestamp after staging all records is supplied to `apply()`
|
|
fn stage_incoming(
|
|
&self,
|
|
inbound: Vec<IncomingBso>,
|
|
telem: &mut telemetry::Engine,
|
|
) -> Result<()>;
|
|
|
|
/// Apply the staged records, returning outgoing records.
|
|
/// Ideally we would adjust this model to better support batching of outgoing records
|
|
/// without needing to keep them all in memory (ie, an iterator or similar?)
|
|
fn apply(
|
|
&self,
|
|
timestamp: ServerTimestamp,
|
|
telem: &mut telemetry::Engine,
|
|
) -> Result<Vec<OutgoingBso>>;
|
|
|
|
/// Indicates that the given record IDs were uploaded successfully to the server.
|
|
/// This may be called multiple times per sync, once for each batch. Batching is determined
|
|
/// dynamically based on payload sizes and counts via the server's advertised limits.
|
|
fn set_uploaded(&self, new_timestamp: ServerTimestamp, ids: Vec<Guid>) -> Result<()>;
|
|
|
|
/// Called once the sync is finished. Not currently called if uploads fail (which
|
|
/// seems sad, but the other batching confusion there needs sorting out first).
|
|
/// Many engines will have nothing to do here, as most "post upload" work should be
|
|
/// done in `set_uploaded()`
|
|
fn sync_finished(&self) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
/// The engine is responsible for building a single collection request. Engines
|
|
/// typically will store a lastModified timestamp and use that to build a
|
|
/// request saying "give me full records since that date" - however, other
|
|
/// engines might do something fancier. It can return None if the server timestamp
|
|
/// has not advanced since the last sync.
|
|
/// This could even later be extended to handle "backfills", and we might end up
|
|
/// wanting one engine to use multiple collections (eg, as a "foreign key" via guid), etc.
|
|
fn get_collection_request(
|
|
&self,
|
|
server_timestamp: ServerTimestamp,
|
|
) -> Result<Option<CollectionRequest>>;
|
|
|
|
/// Get persisted sync IDs. If they don't match the global state we'll be
|
|
/// `reset()` with the new IDs.
|
|
fn get_sync_assoc(&self) -> Result<EngineSyncAssociation>;
|
|
|
|
/// Reset the engine (and associated store) without wiping local data,
|
|
/// ready for a "first sync".
|
|
/// `assoc` defines how this store is to be associated with sync.
|
|
fn reset(&self, assoc: &EngineSyncAssociation) -> Result<()>;
|
|
|
|
/// Wipes the engine's data
|
|
/// This is typically triggered by a client command, which at the time of writing, only
|
|
/// supported wiping bookmarks.
|
|
///
|
|
/// This panics if triggered on a sync engine that does not explicitly implement wipe, because
|
|
/// that implies a confustion that shouldn't occur.
|
|
fn wipe(&self) -> Result<()> {
|
|
unimplemented!("The engine does not implement wipe, no wipe should be requested")
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use std::iter::zip;
|
|
|
|
#[test]
|
|
fn test_engine_priority() {
|
|
fn sorted(mut engines: Vec<SyncEngineId>) -> Vec<SyncEngineId> {
|
|
engines.sort();
|
|
engines
|
|
}
|
|
assert_eq!(
|
|
vec![SyncEngineId::Passwords, SyncEngineId::Tabs],
|
|
sorted(vec![SyncEngineId::Passwords, SyncEngineId::Tabs])
|
|
);
|
|
assert_eq!(
|
|
vec![SyncEngineId::Passwords, SyncEngineId::Tabs],
|
|
sorted(vec![SyncEngineId::Tabs, SyncEngineId::Passwords])
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_engine_enum_order() {
|
|
let unsorted = SyncEngineId::iter().collect::<Vec<SyncEngineId>>();
|
|
let mut sorted = SyncEngineId::iter().collect::<Vec<SyncEngineId>>();
|
|
sorted.sort();
|
|
|
|
// iterating should supply identical elements in each.
|
|
assert!(zip(unsorted, sorted).fold(true, |acc, (a, b)| acc && (a == b)))
|
|
}
|
|
}
|