hyperon/metta/runner/pkg_mgmt/
managed_catalog.rs

1
2use std::path::{Path, PathBuf};
3use std::collections::BTreeMap;
4use std::sync::Mutex;
5
6use git_catalog::{GitCatalog, ModuleGitLocation};
7use crate::metta::runner::*;
8use crate::metta::runner::pkg_mgmt::*;
9
10/// An interface to facilitate direct programatic management of a catalog, usually as a local
11/// mirror of one or more remote catalogs used by a user to insulate them from upstream changes
12//
13//NOTE FOR THE FUTURE: There are two major problems with this `fetch_newest_for_all`
14// interface.
15// 1. There is no way to know which modules may be deleted from the catalog and which must
16//    be kept.  Therefore it is impossible to simply "upgrade" a module - ie. pulling a
17//    new version and removing the old.
18//
19//    This is because an older version of the module may satisfy a dependency that is not
20//    satisfied by the newer version.  And this object does not have enough visibility to
21//    know.
22//
23// 2. Relatedly, there is no way to automatically fetch the latest module for a given
24//    dependency.  For example, if the catalog has v0.1.3 of a mod, and the upstream
25//    catalog contains v0.2.0 and v0.1.5, there is no way to know which is needed between
26//    those two, in the context of the importer's requirements.
27//
28//PROPOSAL: Requirement API.  A ManagedCatalog would need to track which requirements are
29// satisfied by each module, so that if a requirement were better satisfied by another
30// module then the old module could be removed.
31//
32// There are a number of unanswered questions however:
33// - How should the managed catalog interact with modules from other catalogs? Should
34//  the managed catalog track dependencies outside the upstream catalog?  A module from
35//  any catalog can theoretically satisfy a dependency so what happens if a local dir
36//  catalog mod satisfies a sub-dependency, but a newer version of the mod exists on the
37//  remote catalog?
38// - How will the managed catalog logic work with regard to the sat-set solving?
39//   See "QUESTION on shared base dependencies".  In other words, the best dependency mod
40//   in isolation might not be the best when considered holistically.  The Requirement API
41//   needs to take that into account.
42//
43
44/// Indicates the desired behavior for updating the locally-cached module
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub enum UpdateMode {
47    /// Fetches the module if it doesn't exist, otherwise leaves it alone
48    FetchIfMissing,
49    /// Attempts to fetch from the remote catalog is the local cached version is older
50    /// than the specified number of seconds.  Otherwise continues with the cached mod
51    TryFetchIfOlderThan(u64),
52    /// Attempts to fetch from the remote catalog.  Continues with the existing module
53    /// if the remote is unavailable
54    TryFetchLatest,
55    /// Fetches the latest from the remote catalog.  Fails if the remote is unavailable
56    FetchLatest,
57}
58
59impl UpdateMode {
60    /// Returns the more aggressive (more likely to fetch) of the two modes
61    pub fn promote_to(self, other: Self) -> Self {
62        match (&self, &other) {
63            (Self::FetchIfMissing, _) => other,
64            (Self::TryFetchIfOlderThan(_), Self::FetchIfMissing) => self,
65            (Self::TryFetchIfOlderThan(t_s), Self::TryFetchIfOlderThan(t_o)) => Self::TryFetchIfOlderThan((*t_s).min(*t_o)),
66            (Self::TryFetchIfOlderThan(_), _) => other,
67            (Self::TryFetchLatest, Self::FetchLatest) => Self::FetchLatest,
68            (Self::TryFetchLatest, _) => Self::TryFetchLatest,
69            _ => Self::FetchLatest
70        }
71    }
72}
73
74pub trait ManagedCatalog: ModuleCatalog {
75
76    /// Clears all locally stored modules, resetting the local catalog to an empty state
77    fn clear_all(&self) -> Result<(), String>;
78
79    /// Fetch a specific module from the UpstreamCatalog.  Returns `Ok(())`` if the module
80    /// already exists in the catalog
81    ///
82    /// NOTE: This method will likely become async in the future
83    fn fetch(&self, descriptor: &ModuleDescriptor, update_mode: UpdateMode) -> Result<(), String>;
84
85    /// Remove a specific module from the catalog
86    fn remove(&self, descriptor: &ModuleDescriptor) -> Result<(), String>;
87
88    /// AKA "upgrade".  Fetches the newest version for each module that currently exists in
89    /// the catalog
90    ///
91    /// NOTE: This API will likely change in the future.  See "NOTE FOR THE FUTURE" in comments
92    /// for `ManagedCatalog`
93    fn fetch_newest_for_all(&self, update_mode: UpdateMode) -> Result<(), String> {
94        self.sync_toc(update_mode)?;
95        let iter = self.list_name_uid_pairs()
96            .ok_or_else(|| "managed catalog must support `list` method".to_string())?;
97        for (name, uid) in iter {
98            if let Some(desc) = self.lookup_newest_with_uid_and_version_req(&name, uid, None) {
99                self.fetch(&desc, update_mode)?;
100            }
101        }
102        Ok(())
103    }
104}
105
106/// A ManagedCatalog type to mediate and aggregate the contents of one or more "upstream" catalogs
107///
108/// ## Division of responsibilities with upstream catalogs
109///
110/// LocalCatalog:
111/// - Tracks which modules are installed on-disk
112/// - Manages the on-disk location of the downloaded / cached modules
113/// - TODO-Future: will track requirements and module dependency trees
114///
115/// upstream Catalogs:
116/// - Track which modules are available from the outside world
117/// - Track the remote location of each module and any parameters needed to access it
118///
119#[derive(Debug)]
120pub struct LocalCatalog {
121    name: String,
122    upstream_catalogs: Vec<Box<dyn ModuleCatalog>>,
123    storage_dir: PathBuf,
124    local_toc: Mutex<LocalCatalogTOC>,
125}
126
127impl LocalCatalog {
128    pub fn new(caches_dir: &Path, name: &str) -> Result<Self, String> {
129        let storage_dir = caches_dir.join(name);
130        let local_toc = LocalCatalogTOC::build_from_dir(&storage_dir)?;
131
132        Ok(Self {
133            name: name.to_string(),
134            upstream_catalogs: vec![],
135            storage_dir,
136            local_toc: Mutex::new(local_toc),
137        })
138    }
139    pub fn push_upstream_catalog(&mut self, catalog: Box<dyn ModuleCatalog>) {
140        self.upstream_catalogs.push(catalog);
141    }
142    pub fn upstream_catalogs(&self) -> &[Box<dyn ModuleCatalog>] {
143        &self.upstream_catalogs[..]
144    }
145    /// Returns an accessor for the first upstream [GitCatalog] if the LocalCatalog has one,
146    /// otherwise returns None
147    fn first_upstream_git_catalog(&self) -> Option<&GitCatalog> {
148        for upstream in self.upstream_catalogs() {
149            if let Some(git_catalog) = upstream.downcast::<GitCatalog>() {
150                return Some(git_catalog)
151            }
152        }
153        None
154    }
155    /// Adds a specific module into the catalog based on a [ModuleGitLocation]
156    ///
157    /// Returns an error if the LocalCatalog is not capable of working with git modules
158    pub(crate) fn loader_for_explicit_git_module(&self, mod_name: &str, update_mode: UpdateMode, location: &ModuleGitLocation) -> Result<Option<(Box<dyn ModuleLoader>, ModuleDescriptor)>, String> {
159        let descriptor = self.first_upstream_git_catalog()
160            .ok_or_else(|| format!("Catalog {} cannot pull modules from git", self.name))?
161            .register_mod(mod_name, None, location)?;
162        let loader = self.get_loader_with_explicit_refresh(&descriptor, update_mode)?;
163        Ok(Some((loader, descriptor)))
164    }
165    fn lookup_by_name_in_toc(&self, name: &str) -> Option<Vec<ModuleDescriptor>> {
166        let local_toc = self.local_toc.lock().unwrap();
167        local_toc.lookup_by_name(name)
168    }
169    /// Adds the [ModuleDescriptor] to the TOC if it doesn't exist.  Won't create duplicates
170    fn add_to_toc(&self, descriptor: ModuleDescriptor) -> Result<(), String> {
171        let mut local_toc = self.local_toc.lock().unwrap();
172        local_toc.add_descriptor(descriptor)
173    }
174    fn list_toc(&self) -> Vec<ModuleDescriptor> {
175        let local_toc = self.local_toc.lock().unwrap();
176        local_toc.all_sorted_descriptors()
177    }
178    pub(crate) fn get_loader_with_explicit_refresh(&self, descriptor: &ModuleDescriptor, update_mode: UpdateMode) -> Result<Box<dyn ModuleLoader>, String> {
179
180        //Figure out which upstream catalog furnished this descriptor by trying each one
181        let mut upstream_loader = None;
182        for upstream in self.upstream_catalogs.iter() {
183            match upstream.get_loader(descriptor) {
184                Ok(loader) => {
185                    upstream_loader = Some(loader);
186                    break
187                },
188                Err(_) => {}
189            }
190        }
191        let upstream_loader = match upstream_loader {
192            Some(loader) => loader,
193            None => {
194                // TODO: It would be nice to have the option here to pull a different but compatible
195                // mod from the upstream catalogs; however we don't have the original requirement info,
196                // so currently we cannot do that.  See the write-up above about the "Requirement API".
197                return Err(format!("Upstream Catalogs can no longer supply module \"{descriptor}\""));
198            }
199        };
200
201        //Resolve the local dir to use as the local cache
202        let cache_dir_name = dir_name_from_descriptor(descriptor);
203        let local_cache_dir = self.storage_dir.join(cache_dir_name);
204
205        //Make sure this mod is in the TOC
206        self.add_to_toc(descriptor.to_owned())?;
207
208        //Wrap the upstream loader in a loader object from this catalog
209        let wrapper_loader = LocalCatalogLoader {local_cache_dir, upstream_loader, update_mode};
210        Ok(Box::new(wrapper_loader))
211    }
212}
213
214impl ModuleCatalog for LocalCatalog {
215    fn display_name(&self) -> String {
216        self.name.clone()
217    }
218    fn lookup(&self, name: &str) -> Vec<ModuleDescriptor> {
219
220        //If we have some matching modules in the local cache then return them
221        if let Some(descriptors) = self.lookup_by_name_in_toc(name) {
222            return descriptors;
223        }
224
225        //If we don't have anything locally, check the upstream catalogs in order until one
226        // of them returns some results
227        for upstream in self.upstream_catalogs.iter() {
228            let upstream_results = upstream.lookup(name);
229            if upstream_results.len() > 0 {
230                return upstream_results;
231            }
232        }
233
234        //We didn't find any matching modules, locally or upstream 
235        vec![]
236    }
237    fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result<Box<dyn ModuleLoader>, String> {
238        self.get_loader_with_explicit_refresh(descriptor, UpdateMode::FetchIfMissing)
239    }
240    fn list<'a>(&'a self) -> Option<Box<dyn Iterator<Item=ModuleDescriptor> + 'a>> {
241        Some(Box::new(self.list_toc().into_iter()))
242    }
243    fn sync_toc(&self, update_mode: UpdateMode) -> Result<(), String> {
244        for upstream in self.upstream_catalogs.iter() {
245            upstream.sync_toc(update_mode)?;
246        }
247        Ok(())
248    }
249    fn as_managed(&self) -> Option<&dyn ManagedCatalog> {
250        Some(self)
251    }
252}
253
254/// A [ModuleLoader] for a [LocalCatalog] that wraps another ModuleLoader for an upstream [ModuleCatalog]
255#[derive(Debug)]
256struct LocalCatalogLoader {
257    local_cache_dir: PathBuf,
258    update_mode: UpdateMode,
259    upstream_loader: Box<dyn ModuleLoader>
260}
261
262impl ModuleLoader for LocalCatalogLoader {
263    fn prepare(&self, _local_dir: Option<&Path>, update_mode: UpdateMode) -> Result<Option<Box<dyn ModuleLoader>>, String> {
264        let update_mode = self.update_mode.promote_to(update_mode);
265        self.upstream_loader.prepare(Some(&self.local_cache_dir), update_mode)
266    }
267    fn load(&self, _context: &mut RunContext) -> Result<(), String> {
268        unreachable!() //We will substitute the `upstream_loader` during prepare
269    }
270}
271
272impl ManagedCatalog for LocalCatalog {
273    fn clear_all(&self) -> Result<(), String> {
274        if self.storage_dir.is_dir() {
275            std::fs::remove_dir_all(&self.storage_dir).map_err(|e| e.to_string())?;
276        }
277        let mut local_toc = self.local_toc.lock().unwrap();
278        *local_toc = LocalCatalogTOC::build_from_dir(&self.storage_dir)?;
279        Ok(())
280    }
281    fn fetch(&self, descriptor: &ModuleDescriptor, update_mode: UpdateMode) -> Result<(), String> {
282        let loader = self.get_loader_with_explicit_refresh(descriptor, update_mode)?;
283        let _ = loader.prepare(None, update_mode)?;
284        Ok(())
285    }
286    fn remove(&self, descriptor: &ModuleDescriptor) -> Result<(), String> {
287        let cache_dir_name = dir_name_from_descriptor(descriptor);
288        let mod_cache_dir = self.storage_dir.join(cache_dir_name);
289        if mod_cache_dir.is_dir() {
290            std::fs::remove_dir_all(mod_cache_dir).map_err(|e| e.to_string())?;
291            let mut local_toc = self.local_toc.lock().unwrap();
292            local_toc.remove_descriptor(descriptor)
293        } else {
294            Err("No such module in catalog".to_string())
295        }
296    }
297    fn fetch_newest_for_all(&self, update_mode: UpdateMode) -> Result<(), String> {
298        self.sync_toc(update_mode)?;
299        let iter = self.list_name_uid_pairs()
300            .ok_or_else(|| "managed catalog must support `list` method".to_string())?;
301        for (name, uid) in iter {
302
303            //Find the newest version of the mod in each upstream catalog
304            let upstream_bests: Vec<ModuleDescriptor> = self.upstream_catalogs.iter().filter_map(|upstream| {
305                upstream.lookup_newest_with_uid_and_version_req(&name, uid, None) 
306            }).collect();
307            if let Some(newest_desc) = find_newest_module(upstream_bests.into_iter()) {
308                self.fetch(&newest_desc, update_mode)?;
309            }
310        }
311        Ok(())
312    }
313}
314
315/// A Table of Contents (TOC) for a LocalCatalog
316#[derive(Debug)]
317struct LocalCatalogTOC {
318    mods_by_name: BTreeMap<String, Vec<ModuleDescriptor>>
319}
320
321impl LocalCatalogTOC {
322    /// Scans a directory and builds up a TOC from the contents
323    fn build_from_dir(storage_dir: &Path) -> Result<Self, String> {
324        if !storage_dir.exists() {
325            std::fs::create_dir_all(&storage_dir).map_err(|e| e.to_string())?;
326        } else {
327            if !storage_dir.is_dir() {
328                return Err(format!("Found file instead of directory at {}", storage_dir.display()));
329            }
330        }
331
332        let mut new_self = Self {
333            mods_by_name: BTreeMap::new()
334        };
335
336        for dir_item_handle in std::fs::read_dir(storage_dir).map_err(|e| e.to_string())? {
337            let dir_entry = dir_item_handle.map_err(|e| e.to_string())?;
338            let file_name = dir_entry.file_name();
339            let name_str = file_name.to_str()
340                .ok_or_else(|| format!("Invalid characters found in local cache at path: {}", dir_entry.path().display()))?;
341
342            if !Self::should_ignore_dir_entry(name_str) {
343                let descriptor = parse_descriptor_from_dir_name(name_str)?;
344                new_self.add_descriptor(descriptor)?;
345            }
346        }
347
348        Ok(new_self)
349    }
350    /// Returns `false` if the file / directory name is not a module entry, otherwise `true` if it could be
351    fn should_ignore_dir_entry(dir_name: &str) -> bool {
352        // GitCatalog reserves the names "_catalog.repo" and "_catalog.json"
353        if dir_name == "_catalog.repo" || dir_name == "_catalog.json" {
354            return true;
355        }
356        // '.' is illegal in a module name, but lots of software creates .invisible_files, e.g. `.DS_Store`
357        if dir_name.starts_with('.') {
358            return true;
359        }
360        false
361    }
362    fn lookup_by_name(&self, name: &str) -> Option<Vec<ModuleDescriptor>> {
363        if let Some(descriptors) = self.mods_by_name.get(name) {
364            if descriptors.len() > 0 {
365                return Some(descriptors.clone());
366            }
367        }
368        None
369    }
370    /// Returns a Vec containing all ModuleDescriptors in the TOC, sorted by name
371    fn all_sorted_descriptors(&self) -> Vec<ModuleDescriptor> {
372        self.mods_by_name.iter().flat_map(|(_name, desc_vec)| desc_vec).cloned().collect()
373    }
374    /// Adds a descriptor to a TOC.  Won't add a duplicate
375    fn add_descriptor(&mut self, descriptor: ModuleDescriptor) -> Result<(), String> {
376        let desc_vec = self.mods_by_name.entry(descriptor.name().to_owned()).or_insert(vec![]);
377        if !desc_vec.contains(&descriptor) {
378            desc_vec.push(descriptor);
379            desc_vec.sort_by(|a, b| a.version().cmp(&b.version()));
380        }
381        Ok(())
382    }
383    fn remove_descriptor(&mut self, descriptor: &ModuleDescriptor) -> Result<(), String> {
384        fn ret_err() -> Result<(), String> { Err("No such module in catalog".to_string()) }
385        match self.mods_by_name.get_mut(descriptor.name()) {
386            Some(desc_vec) => {
387                match desc_vec.iter().position(|vec_desc| vec_desc==descriptor) {
388                    Some(idx) => {
389                        desc_vec.remove(idx);
390                        Ok(())
391                    },
392                    None => ret_err()
393                }
394            },
395            None => ret_err()
396        }
397    }
398}
399
400/// Returns a String that can be used as a directory to cache local files associated
401/// with the module, such as build artifacts and/or downloads
402pub(crate) fn dir_name_from_descriptor(desc: &ModuleDescriptor) -> String {
403    let mod_dir_name = match desc.version() {
404        Some(version) => format!("{}@{version}", desc.name()),
405        None => desc.name().to_string()
406    };
407    match desc.uid() {
408        Some(uid) => format!("{mod_dir_name}#{uid:016x}"),
409        None => format!("{mod_dir_name}")
410    }
411}
412
413/// Performs the inverse of [dir_name_from_descriptor], deconstructing a dir_name str into a [ModuleDescriptor]
414pub(crate) fn parse_descriptor_from_dir_name(dir_name: &str) -> Result<ModuleDescriptor, String> {
415    let (name_and_vers, uid) = match dir_name.rfind('#') {
416        Some(pos) => (&dir_name[0..pos], Some(&dir_name[pos+1..])),
417        None => (dir_name, None)
418    };
419    let (name, version) = match name_and_vers.find('@') {
420        Some(pos) => (&name_and_vers[0..pos], Some(&name_and_vers[pos+1..])),
421        None => (name_and_vers, None)
422    };
423    let version = match version {
424        Some(ver_str) => Some(semver::Version::parse(ver_str).map_err(|e| e.to_string())?),
425        None => None
426    };
427    let uid = match uid {
428        Some(uid_str) => Some(u64::from_str_radix(uid_str, 16).map_err(|e| e.to_string())?),
429        None => None
430    };
431    Ok(ModuleDescriptor::new(name.to_string(), version, uid))
432}