hyperon/metta/runner/pkg_mgmt/
catalog.rs

1//!
2//! # Module Resolution
3//!
4//! ## Behavior of Module Resolution
5//!
6//! ```text
7//!       ┌────────────────────┐           ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽                    ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽
8//!      ╱                    ╱       ⎽⎼⎻⎺  pkg-info in  ⎺⎻⎼⎽ Yes      ⎽⎼⎻⎺pkg-info entry ⎺⎻⎼⎽ No
9//!     ╱      (import!)     ╱─────►<   &self has entry for   >─────►<   has fs_path attrib?   >───┐
10//!    ╱                    ╱         ⎺⎻⎼⎽    module?    ⎽⎼⎻⎺          ⎺⎻⎼⎽               ⎽⎼⎻⎺     │
11//!   └────────────────────┘               ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺                    ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺          │
12//!                                              │ No                             │ Yes            │
13//!  ┌─────────────────────────┐     ┌───────────▼─────────────┐      /───────────▼─────────────\  │
14//!  │  Query ModuleCatalogs   │     │    Assume any module    │      │    Load the module at   │  │
15//!  │     in order, with      │◄──┬─┤  version will satisfy   │      │   the file-system path  │  │
16//!  │   version requirement   │   │ │       dependency        │      │   with first successful │  │
17//!  │                         │   │ │                         │      │      FsModuleFormat     │  │
18//!  └───────────┬─────────────┘   │ └───────────▲─────────────┘      \───────────▲─────────────/  │
19//!              │                 │             │                                │                │
20//!  /───────────▼─────────────\   │             │                    ┌───────────┴─────────────┐  │
21//!  │  Load the module from   │   │             │                    │    clone module from    │  │
22//!  │   the first catalog     │   │             │                    │     remote repo to      │  │
23//!  │     that reports a      │   │             │                    │    local resource dir   │  │
24//!  │    successful match     │   │             │                    │                         │  │
25//!  \─────────────────────────/   │             │                    └───────────▲─────────────┘  │
26//!                                │             │ No                             │ Yes            │
27//!                                │Yes    ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽                    ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽          │
28//!                                │  ⎽⎼⎻⎺pkg-info entry ⎺⎻⎼⎽       No ⎽⎼⎻⎺pkg-info entry ⎺⎻⎼⎽     │
29//!                                └<   has version attrib?   >◄─────<     has git attrib?     >───┘
30//!                                   ⎺⎻⎼⎽               ⎽⎼⎻⎺          ⎺⎻⎼⎽               ⎽⎼⎻⎺
31//!                                        ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺                    ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺
32//! ```
33//!
34
35//LP-TODO-NEXT make a test to make sure circular imports are caught and don't lead to infinite recursion
36//QUESTION: Should circular imports between modules be allowed?  The current implementation (and possibly
37// the MeTTa language itself) disallow circular imports because there is no concept of forward declaration.
38// It *may* be possible to pre-parse the language in order to make recursive imports possible, but I have
39// not yet thought in detail about this.
40//
41
42//QUESTION on shared base dependencies & sat-set solving:
43//The currently implemented design resolves each module's dependencies in a straightforward depth-first
44//  order.  This is possible because the module system allows multiple instances of the same module to
45//  be loaded simultaneously.  So each module can pick its best dependencies based on its pkg-info and
46//  the available catalogs.
47//However, consider the following situation:
48//  ModA depends on ModI for some interface types (for example a special String type)
49//  ModB depends on ModI for the same interface types, but ModA and ModB don't know about each other
50//  ModTop depends on both ModA and ModB, and uses functionality in ModA to create some objects that
51//   it expects ModB to be able to use.  Therefore the system must guarantee the same version of ModI
52//   gets imported by both ModA and ModB.
53//This is precisely the opposite behavior from the ability of a module to carry around "private"
54//  dependencies and know that those dependencies will always be loaded, and they won't be substituted
55//  for another version.
56//
57//I see several possible solutions:
58// 1.) We could disallow private dependencies altogether.  This is the approach taken by Cargo.
59//  However this contravenes some of the desiderata outlined in this issue:
60//  https://github.com/trueagi-io/hyperon-experimental/issues/470
61// 2.) We could require explicit re-exporting of a dependency module used in a module's interface, which
62//   would give the implementation an opportunity to find dependency module versions that work for
63//   all other modules that use them in common.  ie. solve for the sat set.  Optionally, with this approach,
64//   the module could also opt to re-export a private dependency as part of itself, making the interface
65//   between ModA and ModB in the example deliberately incompatible. 
66// 3.) We could require private dependencies to be explicitly specified as private in the pkg-info.  With
67//  the default behavior being a single module for each module name.  This might be a reasonable compromise
68//  between 1 & 2, however we would likely need some form of linting, so that a user doesn't shoot
69//  themselves in the foot by exporting an interface that includes items from a private dependency
70//
71// I think my personal preference is for #2.
72
73use core::any::Any;
74use std::path::Path;
75use std::collections::hash_map::DefaultHasher;
76use std::hash::Hasher;
77use std::ffi::{OsStr, OsString};
78use std::collections::HashSet;
79
80use crate::metta::runner::modules::*;
81use crate::metta::runner::{*, git_catalog::*};
82
83use xxhash_rust::xxh3::xxh3_64;
84use serde::{Deserialize, Serialize};
85
86/// Implemented for types capable of locating MeTTa modules
87///
88/// For example, `ModuleCatalog` would be an interface to a module respository, analogous to `PyPI` or
89/// `crates.io` but `ModuleCatalog` is also implemented for [Path] because any file system directory may be
90/// capable of storing and indexing MeTTa modules.
91///
92/// `ModuleCatalog` types are closely connected with [ModuleLoader] types because the `ModuleCatalog` must
93/// recognize the module in whatever media it exists, and supply the `ModuleLoader` to load that module
94pub trait ModuleCatalog: std::fmt::Debug + Send + Sync {
95    /// The name of the catalog, to be displayed to the user
96    fn display_name(&self) -> String {
97        std::any::type_name::<Self>().to_string()
98    }
99
100    /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name
101    fn lookup(&self, name: &str) -> Vec<ModuleDescriptor>;
102
103    /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name,
104    ///   and uid match
105    fn lookup_with_uid(&self, name: &str, uid: Option<u64>) -> Vec<ModuleDescriptor> {
106        self.lookup(name).into_iter().filter(|desc| desc.uid == uid).collect()
107    }
108
109    /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name
110    /// matching the version requirements
111    ///
112    /// NOTE: Unversioned modules will never match any version_req, so this method should never return
113    /// any un-versioned ModuleDescriptors if `version_req.is_some()`
114    fn lookup_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Vec<ModuleDescriptor> {
115        filter_by_version_req(self.lookup(name).into_iter(), version_req).collect()
116    }
117
118    /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that matches the
119    /// specified version requirement, or `None` if no module exists
120    ///
121    /// If `version_req == None`, this method should return the newest module available in the catalog
122    ///
123    /// NOTE: unversioned modules are considered to have the lowest possible version, and thus this method
124    ///   should only return an unversioned module if no matching modules are available
125    /// NOTE: Unversioned modules will never match any version_req, so this method should never return
126    /// any un-versioned ModuleDescriptors if `version_req.is_some()`
127    fn lookup_newest_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Option<ModuleDescriptor> {
128        find_newest_module(self.lookup_with_version_req(name, version_req).into_iter())
129    }
130
131    /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that matches the
132    /// specified name, uid, and version requirement, or `None` if no module exists
133    ///
134    /// See [ModuleCatalog::lookup_newest_with_version_req] for more details
135    fn lookup_newest_with_uid_and_version_req(&self, name: &str, uid: Option<u64>, version_req: Option<&semver::VersionReq>) -> Option<ModuleDescriptor> {
136        let result_iter = self.lookup_with_uid(name, uid).into_iter();
137        find_newest_module(filter_by_version_req(result_iter, version_req))
138    }
139
140    /// Returns a [ModuleLoader] for the specified module from the `ModuleCatalog`
141    fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result<Box<dyn ModuleLoader>, String>;
142
143    /// Returns an iterator over every module available in the catalog.  May not be supported
144    /// by all catalog implementations
145    fn list<'a>(&'a self) -> Option<Box<dyn Iterator<Item=ModuleDescriptor> + 'a>> {
146        None
147    }
148
149    /// Returns an iterator over every unique module name in the catalog.  May not be supported
150    /// by all catalog implementations
151    fn list_names<'a>(&'a self) -> Option<Box<dyn Iterator<Item=String> + 'a>> {
152        self.list().map(|desc_iter| {
153            let mut names = HashSet::new();
154            for desc in desc_iter {
155                if !names.contains(desc.name()) {
156                    names.insert(desc.name().to_string());
157                }
158            }
159            Box::new(names.into_iter()) as Box<dyn Iterator<Item=String>>
160        })
161    }
162
163    /// Returns an iterator over every unique (module name, uid) pair in the catalog.  May not
164    /// be supported by all catalog implementations
165    fn list_name_uid_pairs<'a>(&'a self) -> Option<Box<dyn Iterator<Item=(String, Option<u64>)> + 'a>> {
166        self.list().map(|desc_iter| {
167            let mut results = HashSet::new();
168            for desc in desc_iter {
169                results.insert((desc.name().to_string(), desc.uid()));
170            }
171            Box::new(results.into_iter()) as Box<dyn Iterator<Item=(String, Option<u64>)>>
172        })
173    }
174
175    /// Returns the catalog as an [Any] in order to get back to the underlying object
176    fn as_any(&self) -> Option<&dyn Any> {
177        None
178    }
179
180    /// Synchronize the catalog's internal tables, so fresh upstream info is reflected
181    /// locally.  Does not fetch any modules
182    fn sync_toc(&self, _update_mode: UpdateMode) -> Result<(), String> {
183        Ok(())
184    }
185
186    /// Returns the catalog as a [ManagedCatalog] if the catalog supports active management
187    fn as_managed(&self) -> Option<&dyn ManagedCatalog> {
188        None
189    }
190}
191
192impl dyn ModuleCatalog {
193    /// Returns the catalog as as an underlying type, if it's supported by the catalog format
194    pub fn downcast<T: 'static>(&self) -> Option<&T> {
195        self.as_any()?.downcast_ref()
196    }
197}
198
199/// Internal function to filter a set of [ModuleDescriptor]s by a [semver::VersionReq].  See
200/// [ModuleCatalog::lookup_with_version_req] for an explanation of behavior
201fn filter_by_version_req<'a>(mods_iter: impl Iterator<Item=ModuleDescriptor> + 'a, version_req: Option<&'a semver::VersionReq>) -> Box<dyn Iterator<Item=ModuleDescriptor> + 'a> {
202    match version_req {
203        Some(req) => Box::new(mods_iter.filter(|desc| {
204            match desc.version() {
205                Some(ver) => req.matches(ver),
206                None => false
207            }
208        })),
209        None => Box::new(mods_iter)
210    }
211}
212
213/// Internal function to find the newest module in a set.  See [ModuleCatalog::lookup_newest_with_version_req]
214/// for an explanation of behavior
215pub(crate) fn find_newest_module(mods_iter: impl Iterator<Item=ModuleDescriptor>) -> Option<ModuleDescriptor> {
216    let mut highest_version: Option<semver::Version> = None;
217    let mut ret_desc = None;
218    for desc in mods_iter {
219        match desc.version().cloned() {
220            Some(ver) => {
221                match &mut highest_version {
222                    Some(highest_ver) => {
223                        if ver > *highest_ver {
224                            *highest_ver = ver;
225                            ret_desc = Some(desc);
226                        }
227                    },
228                    None => {
229                        ret_desc = Some(desc);
230                        highest_version = Some(ver)
231                    }
232                }
233            },
234            None => {
235                if highest_version.is_none() {
236                    if let Some(ret_desc) = ret_desc {
237                        log::warn!("Multiple un-versioned {} modules in catalog; impossible to select newest", ret_desc.name());
238                    }
239                    ret_desc = Some(desc)
240                }
241            }
242        }
243    }
244    ret_desc
245}
246
247/// The object responsible for locating and selecting dependency modules for each [MettaMod]
248///
249/// This structure is conceptually analogous to the a `Cargo.toml` file for a given module.
250#[derive(Clone, Debug, Default, Deserialize)]
251pub struct PkgInfo {
252
253    /// The public name of the module
254    ///
255    /// Should be composed of alpha-numeric characters with '-' and '_' characters allowed.  Must not
256    /// contain any other punctuation
257    pub name: Option<String>,
258
259    /// The version of this module
260    ///
261    /// A `None` or missing version is considered inferior to all other versions
262    #[serde(default)]
263    pub version: Option<semver::Version>,
264
265    /// If `strict == true` then a dependency must be declared in the `PkgInfo`, otherwise a permissive
266    /// version requirement will be assumed for any modules that are not explicitly declared
267    #[serde(default)]
268    pub strict: bool,
269
270    /// Requirements for each dependency sub-module
271    ///
272    /// A Duplicate entry for a given sub-module in the deps list is an error.
273    #[serde(default)]
274    pub deps: HashMap<String, DepEntry>,
275}
276
277/// A single entry in a [PkgInfo]'s dependencies, specifying the properties of a module that will satisfy a dependency
278#[derive(Clone, Debug, Default, Deserialize)]
279pub struct DepEntry {
280    /// Indicates that the dependency module should be loaded from a specific FS path
281    ///
282    /// If the fs_path is specified, the other pkg_info attributes will be ignored.
283    #[serde(default)]
284    pub fs_path: Option<PathBuf>,
285
286    #[serde(flatten)]
287    git_location: ModuleGitLocation,
288
289    /// An acceptable version of version bounds to satisfy the dependency.  None means any version
290    /// acceptable
291    #[serde(default)]
292    pub version_req: Option<semver::VersionReq>
293}
294
295impl PkgInfo {
296    /// Returns the version of the package
297    pub fn version(&self) -> Option<&semver::Version> {
298        self.version.as_ref()
299    }
300    /// Returns the version of the package as a [semver compliant](https://semver.org) string of bytes
301    pub fn version_bytes(&self) -> Result<Vec<u8>, String> {
302        match self.version() {
303            Some(ver) => Ok(format!("{ver}").into_bytes()),
304            None => Err("no version available".to_string())
305        }
306    }
307}
308
309/// Resolves which module to load from which available location or catalog, and returns the [ModuleLoader] to
310/// load that module
311pub(crate) fn resolve_module(pkg_info: Option<&PkgInfo>, context: &RunContext, name_path: &str) -> Result<Option<(Box<dyn ModuleLoader>, ModuleDescriptor)>, String> {
312    let mod_name = mod_name_from_path(name_path);
313
314    //Make sure the name is a legal module name
315    if !module_name_is_legal(mod_name) {
316        return Err(format!("Illegal module name: {mod_name}"));
317    }
318
319    //See if we have a pkg_info dep entry for the module
320    let mut version_req = None;
321    if let Some(entry) = pkg_info.as_ref().and_then(|pkg_info| pkg_info.deps.get(mod_name)) {
322
323        //If path is explicitly specified in the dep entry, then we must load the module at the
324        // specified path, and cannot search anywhere else
325        if let Some(path) = &entry.fs_path {
326            return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), path, Some(mod_name), context.module().resource_dir());
327        }
328
329        //Get the module if it's specified with git keys
330        if entry.git_location.get_url().is_some() {
331            match context.metta.environment().specified_mods.as_ref() {
332                Some(specified_mods) => if let Some(pair) = specified_mods.loader_for_explicit_git_module(mod_name, UpdateMode::FetchIfMissing, &entry.git_location)? {
333                    return Ok(Some(pair));
334                },
335                None => return Err(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available"))
336            }
337        }
338
339        //If `version_req` is specified in the dep entry, then use it to constrain the catalog search
340        version_req = entry.version_req.as_ref();
341    } else {
342        //If the PkgInfo doesn't have an entry for the module and the PkgInfo is flagged as "strict"
343        // then we will not attempt to resolve the module any further, and the resolution will fail.
344        if let Some(pkg_info) = &pkg_info {
345            if pkg_info.strict {
346                return Ok(None);
347            }
348        }
349    }
350
351    //Search the module's resource dir before searching the environment's catalogs
352    // This allows a module to import another module inside its directory or as a peer of itself for
353    // single-file modules, without including an explicit PkgInfo dep entry.  On the other hand, If we
354    // want to require module authors to include a dep entry to be explicit about their dependencies, we
355    // can remove this catalog
356    let resource_dir_catalog;
357    let mut local_catalogs = vec![];
358    if let Some(mod_resource_dir) = context.module().resource_dir() {
359        if context.metta.environment().working_dir() != Some(mod_resource_dir) {
360            resource_dir_catalog = DirCatalog::new(PathBuf::from(mod_resource_dir), context.metta().environment().fs_mod_formats.clone());
361            local_catalogs.push(&resource_dir_catalog as &dyn ModuleCatalog);
362        }
363    }
364
365    //Search the catalogs, starting with the resource dir, and continuing to the runner's Environment
366    for catalog in local_catalogs.into_iter().chain(context.metta.environment().catalogs()) {
367        log::trace!("Looking for module: \"{mod_name}\" inside {catalog:?}");
368        match catalog.lookup_newest_with_version_req(mod_name, version_req) {
369            Some(descriptor) => {
370                log::info!("Found module: \"{mod_name}\" inside {:?}", catalog.display_name());
371                log::info!("Preparing to load module: \'{}\' as \'{}\'", descriptor.name, name_path);
372                return Ok(Some((catalog.get_loader(&descriptor)?, descriptor)))
373            },
374            None => {}
375        }
376    }
377
378    Ok(None)
379}
380
381/// Internal function to get a loader for a module at a specific file system path, by trying each FsModuleFormat in order
382pub(crate) fn loader_for_module_at_path<'a, P: AsRef<Path>, FmtIter: Iterator<Item=&'a dyn FsModuleFormat>>(fmts: FmtIter, path: P, name: Option<&str>, search_dir: Option<&Path>) -> Result<Option<(Box<dyn ModuleLoader>, ModuleDescriptor)>, String> {
383
384    //If the path is not an absolute path, assume it's relative to the running search_dir
385    let path = if path.as_ref().is_absolute() {
386        PathBuf::from(path.as_ref())
387    } else {
388        search_dir.ok_or_else(|| format!("Error loading {}.  Working directory or module resource dir required to load modules by relative path", path.as_ref().display()))?
389            .join(path)
390    };
391
392    //If a mod name was supplied, we want to make sure it's not a full name path
393    let name = match name {
394        Some(name) => Some(mod_name_from_path(name)),
395        None => None
396    };
397
398    //Check all module formats, to try and load the module at the path
399    for fmt in fmts {
400        if let Some((loader, descriptor)) = fmt.try_path(&path, name) {
401            return Ok(Some((loader, descriptor)))
402        }
403    }
404
405    Err(format!("No module format able to interpret module at {}", path.display()))
406}
407
408/// A loader for a MeTTa module that lives within a single `.metta` file
409#[derive(Debug)]
410pub(crate) struct SingleFileModule {
411    path: PathBuf,
412    pkg_info: PkgInfo,
413}
414
415impl SingleFileModule {
416    fn new(path: &Path, pkg_info: PkgInfo) -> Self {
417        Self {path: path.into(), pkg_info }
418    }
419    fn open_file(&self) -> Result<std::fs::File, String> {
420        std::fs::File::open(&self.path)
421            .map_err(|err| format!("Could not read file, path: {}, error: {}", self.path.display(), err))
422    }
423}
424
425impl ModuleLoader for SingleFileModule {
426    fn load(&self, context: &mut RunContext) -> Result<(), String> {
427
428        let space = GroundingSpace::new();
429        let resource_dir = self.path.parent().unwrap();
430        context.init_self_module(space.into(), Some(resource_dir.into()));
431
432        let parser = SExprParser::new(std::io::BufReader::new(self.open_file()?));
433        context.push_parser(Box::new(parser));
434
435        Ok(())
436    }
437    fn get_resource(&self, res_key: ResourceKey) -> Result<Resource, String> {
438        match res_key {
439            ResourceKey::MainMettaSrc => self.open_file().map(Into::<Resource>::into),
440            ResourceKey::Version => self.pkg_info.version_bytes().map(Into::<Resource>::into),
441            _ => Err("unsupported resource key".to_string())
442        }
443    }
444}
445
446/// A loader for a MeTTa module implemented as a directory
447///
448/// A `DirModule` can contain MeTTa code in a `module.metta` file, but any directory may
449/// be explicitly loaded as a module, making the directory contents available as resources.
450///
451/// See the "Anatomy of a Directory Module" section in the LP-TODO Finish writeup of user-level guide
452#[derive(Debug)]
453pub(crate) struct DirModule {
454    path: PathBuf,
455    pkg_info: PkgInfo,
456}
457
458impl DirModule {
459    fn new(path: &Path, pkg_info: PkgInfo) -> Self {
460        Self { path: path.into(), pkg_info }
461    }
462    fn open_file(&self) -> Result<std::fs::File, String> {
463        let module_metta_path = self.path.join("module.metta");
464        std::fs::File::open(module_metta_path)
465            .map_err(|err| format!("Could not read file, path: {}, error: {}", self.path.display(), err))
466    }
467}
468
469impl ModuleLoader for DirModule {
470    fn load(&self, context: &mut RunContext) -> Result<(), String> {
471
472        let space = GroundingSpace::new();
473        let resource_dir = &self.path;
474        context.init_self_module(space.into(), Some(resource_dir.into()));
475
476        // A module.metta file is optional.  Without one a dir module behaves as just
477        // a container for other resources and sub-modules.
478        if let Some(program_file) = self.open_file().ok() {
479            let parser = SExprParser::new(std::io::BufReader::new(program_file));
480            context.push_parser(Box::new(parser));
481        }
482
483        Ok(())
484    }
485    fn get_resource(&self, res_key: ResourceKey) -> Result<Resource, String> {
486        match res_key {
487            ResourceKey::MainMettaSrc => self.open_file()
488                .map_err(|_| format!("no module.metta file found in {} dir module", self.path.display()))
489                .map(Into::<Resource>::into),
490            ResourceKey::Version => self.pkg_info.version_bytes()
491                .map(Into::<Resource>::into),
492            _ => Err("unsupported resource key".to_string())
493        }
494    }
495}
496
497/// Implemented on a type to test if a given file-system path points to a MeTTa module, and to construct
498/// possible paths within a parent directory for a module of a certain name
499///
500/// Objects implementing this trait work with in conjunction with [DirCatalog] and [PkgInfo] to facilitate
501/// loading modules from include directories, specific paths, and remote `git` repositories.
502pub trait FsModuleFormat: std::fmt::Debug + Send + Sync {
503
504    /// Returns the possible paths inside a parent directory which may point to a module
505    ///
506    /// NOTE: This function is allowed to return paths that may not be valid.  Paths returned
507    /// from this method will be passed to [Self::try_path] to validate them.
508    fn paths_for_name(&self, parent_dir: &Path, mod_name: &str) -> Vec<PathBuf>;
509
510    /// Checks a specific path, and returns a [ModuleLoader] and a [ModuleDescriptor] if a
511    /// supported module resides at the path
512    ///
513    /// This method should return `None` if the path does not point to a valid module in the
514    /// implemented format.
515    fn try_path(&self, path: &Path, mod_name: Option<&str>) -> Option<(Box<dyn ModuleLoader>, ModuleDescriptor)>;
516}
517
518/// Arbitrary number unlikely to be chosen by another FsModuleFormat
519const SINGLE_FILE_MOD_FMT_ID: u64 = u64::MAX - 5000;
520
521/// Arbitrary number unlikely to be chosen by another FsModuleFormat
522const DIR_MOD_FMT_ID: u64 = u64::MAX - 5001;
523
524/// An object to identify and load a single-file module (naked .metta files)
525#[derive(Debug)]
526pub struct SingleFileModuleFmt;
527
528impl FsModuleFormat for SingleFileModuleFmt {
529    fn paths_for_name(&self, parent_dir: &Path, mod_name: &str) -> Vec<PathBuf> {
530        let base_path = parent_dir.join(mod_name);
531        let extended_path = push_extension(&base_path, ".metta");
532        vec![base_path, extended_path]
533    }
534    fn try_path(&self, path: &Path, mod_name: Option<&str>) -> Option<(Box<dyn ModuleLoader>, ModuleDescriptor)> {
535        if path.is_file() {
536            let mod_name = match mod_name {
537                Some(mod_name) => mod_name,
538                None => path.file_stem().unwrap().to_str().unwrap(), //LP-TODO-NEXT: Unify the code to extract the mod-name from the file name between here and DirModuleFmt::try_path
539            };
540
541            //TODO: parse out the module version here, and pass it to new_with_path_and_fmt_id below
542            //In a single-file module, the discriptor information will be embedded within the MeTTa code
543            // Therefore, we need to parse the whole text of the module looking for a `_pkg-info` atom,
544            // that we can then convert into a PkgInfo structure
545            let pkg_info = PkgInfo::default();
546
547            let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), None, path, SINGLE_FILE_MOD_FMT_ID);
548            let loader = Box::new(SingleFileModule::new(path, pkg_info));
549            Some((loader, descriptor))
550        } else {
551            None
552        }
553    }
554}
555
556/// An object to identify and load a MeTTa module implemented as a directory
557#[derive(Debug)]
558pub struct DirModuleFmt;
559
560impl FsModuleFormat for DirModuleFmt {
561    fn paths_for_name(&self, parent_dir: &Path, mod_name: &str) -> Vec<PathBuf> {
562        let path = parent_dir.join(mod_name);
563        vec![path]
564    }
565    fn try_path(&self, path: &Path, mod_name: Option<&str>) -> Option<(Box<dyn ModuleLoader>, ModuleDescriptor)> {
566        if path.is_dir() {
567
568            //First see if we can extract a [PkgInfo] from a `pkg-info.json` file
569            let mut pkg_info: Option<PkgInfo> = None;
570            let pkginfo_json_path = path.join("pkg-info.json");
571            if pkginfo_json_path.exists() {
572                let file_contents = std::fs::read_to_string(&pkginfo_json_path).unwrap();
573                pkg_info = Some(serde_json::from_str(&file_contents).unwrap());
574            }
575
576            //TODO: Also check for a `pkg-info.metta` file, as soon as I have implemented Atom-Serde
577            // Also try and parse a `_pkg-info` atom from the `module.metta` file if it's not in a dedicated file
578
579            let pkg_info = pkg_info.unwrap_or_else(|| PkgInfo::default());
580
581            //Get the module name, first use the name provided.  If none, then use the name from the
582            // pkg-info, and if that's also none, construct a module name from the file name
583            let full_path;
584            let mod_name = match mod_name {
585                Some(mod_name) => mod_name,
586                None => {
587                    match &pkg_info.name {
588                        Some(name) => name,
589                        None => {
590                            //LP-TODO-Next: I need to gracefully create a legal module name from the file name
591                            // if the file name happens to contain characters that are illegal in a module name
592                            full_path = path.canonicalize().unwrap();
593                            full_path.file_stem().unwrap().to_str().unwrap()
594                        }
595                    }
596                },
597            };
598
599            let version = pkg_info.version.clone();
600            let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), version, path, DIR_MOD_FMT_ID);
601            let loader = Box::new(DirModule::new(path, pkg_info));
602            return Some((loader, descriptor));
603        }
604        None
605    }
606}
607
608/// Implements ModuleCatalog to load MeTTa modules from a file-system directory trying a number of
609/// [FsModuleFormat] formats in succession
610#[derive(Debug)]
611pub struct DirCatalog {
612    path: PathBuf,
613    fmts: Arc<Vec<Box<dyn FsModuleFormat>>>,
614}
615
616impl DirCatalog {
617    /// Internal function to initialize a DirCatalog for a directory in the module search path
618    pub(crate) fn new(path: PathBuf, fmts: Arc<Vec<Box<dyn FsModuleFormat>>>) -> Self {
619        Self {path, fmts}
620    }
621}
622
623impl ModuleCatalog for DirCatalog {
624    fn display_name(&self) -> String {
625        format!("Dir \"{}\"", self.path.display())
626    }
627    fn lookup(&self, name: &str) -> Vec<ModuleDescriptor> {
628
629        //QUESTION: How should we handle modules with an internal "package-name" that differs from their
630        // name in the file system?
631        //
632        //Cargo treats the module's internal "package-name" as authoritative, but that means it's impossible
633        // to "install" a module simply by dropping it into a directory in the search path, because there
634        // needs to be an index of all available modules in that directory.
635        //
636        //For us, I think we want a less formal approach akin to Python's, where we are allowed to drop a
637        // module into a directory, and import it with a naked `import` statement (i.e. no pkg-info entry)
638        // but for that to work, we need to stipulate that it's possible to infer a file name from a
639        // module name.
640        //
641        //NOTE: This is not a limitation across all catalogs, just the `DirCatalog`  If a catalog is able
642        // to maintain its own index of module names, it can store the modules any way it wants to.
643        //
644
645        let mut found_modules = vec![];
646
647        //Inspect the directory using each FsModuleFormat, in order
648        visit_modules_in_dir_using_mod_formats(&self.fmts, &self.path, name, |_loader, descriptor| {
649            found_modules.push(descriptor);
650            true
651        });
652
653        found_modules
654    }
655    fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result<Box<dyn ModuleLoader>, String> {
656
657        let mut matching_module = None;
658        visit_modules_in_dir_using_mod_formats(&self.fmts, &self.path, &descriptor.name, |loader, resolved_descriptor| {
659            if &resolved_descriptor == descriptor {
660                matching_module = Some(loader);
661                true
662            } else {
663                false
664            }
665        });
666
667        match matching_module {
668            Some(loader) => Ok(loader),
669            None => Err(format!("Failed to load module {} in directory {}", &descriptor.name, self.path.display()))
670        }
671    }
672}
673
674/// Internal Utility Function.  Blindly appends an extension onto a path, even if the path already
675/// has an extension
676fn push_extension(path: &Path, extension: impl AsRef<OsStr>) -> PathBuf {
677    let mut os_string: OsString = path.into();
678    os_string.push(extension.as_ref());
679    os_string.into()
680}
681
682/// Internal function to try FsModuleFormat formats in order.  If the closure returns `true` this function
683/// will exit, otherwise it will try every path returned by every format
684fn visit_modules_in_dir_using_mod_formats(fmts: &[Box<dyn FsModuleFormat>], dir_path: &Path, mod_name: &str, mut f: impl FnMut(Box<dyn ModuleLoader>, ModuleDescriptor) -> bool) {
685
686    for fmt in fmts {
687        for path in fmt.paths_for_name(dir_path, mod_name) {
688            if let Some((loader, descriptor)) = fmt.try_path(&path, Some(mod_name)) {
689                if f(loader, descriptor) {
690                    return;
691                }
692            }
693        }
694    }
695}
696
697/// A data structure that uniquely identifies an exact instance of a module
698///
699/// If two modules have the same ModuleDescriptor, they are considered to be the same module
700///
701/// The uid field encodes particulars about a module so it will never be mistaken for another copy
702/// or variation of the module even if the version field is the same.  For example, a module loaded
703/// from the file system will use the uid to hash the path, while a module fetched from git will
704/// hash the url and branch.
705#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
706pub struct ModuleDescriptor {
707    name: String,
708    uid: Option<u64>,
709    version: Option<semver::Version>,
710}
711
712impl core::fmt::Display for ModuleDescriptor {
713    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
714        write!(f, "{}", self.name)?;
715        if let Some(version) = &self.version {
716            write!(f, " @{version}")?;
717        }
718        if let Some(uid) = self.uid {
719            write!(f, " #{uid:016x}")?;
720        }
721        Ok(())
722    }
723}
724
725impl ModuleDescriptor {
726    /// Create a new ModuleDescriptor
727    pub fn new(name: String, version: Option<semver::Version>, uid: Option<u64>) -> Self {
728        Self { name, uid, version }
729    }
730    /// Returns a new ModuleDescriptor by computing a stable hash of the `ident` bytes, and using the `fmt_id`
731    pub fn new_with_ident_bytes_and_fmt_id(name: String, version: Option<semver::Version>, ident: &[u8], fmt_id: u64) -> Self {
732        let uid = Self::uid_from_ident_bytes_and_fmt_id(ident, fmt_id);
733        ModuleDescriptor::new(name, version, Some(uid))
734    }
735    /// Create a new ModuleDescriptor using a file system path and another unique id
736    ///
737    /// The descriptor's uid is based on a stable-hash of the path, because a module loaded by
738    /// path shouldn't be substituted for any other module unless it's from the same path.
739    ///
740    /// The purpose of the `fmt_id` is to ensure two different formats or catalogs don't generate
741    /// the same ModuleDescriptor, but you can pass 0 if it doesn't matter
742    pub fn new_with_path_and_fmt_id(name: String, version: Option<semver::Version>, path: &Path, fmt_id: u64) -> Self {
743        Self::new_with_ident_bytes_and_fmt_id(name, version, path.as_os_str().as_encoded_bytes(), fmt_id)
744    }
745    /// Returns the name of the module represented by the ModuleDescriptor
746    pub fn name(&self) -> &str {
747        &self.name
748    }
749    /// Returns the uid associated with the ModuleDescriptor
750    pub fn uid(&self) -> Option<u64> {
751        self.uid
752    }
753    /// Returns the version of the module represented by the ModuleDescriptor
754    pub fn version(&self) -> Option<&semver::Version> {
755        self.version.as_ref()
756    }
757    /// Internal.  Use the Hash trait to get a uid for the whole ModuleDescriptor
758    pub fn hash(&self) -> u64 {
759        let mut hasher = DefaultHasher::new();
760        std::hash::Hash::hash(self, &mut hasher);
761        hasher.finish()
762    }
763    /// Returns a uid based on a stable hash of `the ident` bytes, and the fmt_id
764    pub fn uid_from_ident_bytes_and_fmt_id(ident: &[u8], fmt_id: u64) -> u64 {
765        xxh3_64(ident) ^ fmt_id
766    }
767}
768
769/// Extracts the module name from a `.git` URL
770///
771/// For example, `https://github.com/trueagi-io/hyperon-experimental.git` would be parsed
772/// into "hyperon-experimental".  Returns None if the form of the URL isn't recognized
773pub fn mod_name_from_url(url: &str) -> Option<String> {
774    let without_ending = url.trim_end_matches("/")
775        .trim_end_matches(".git");
776    let without_mod_name = without_ending.trim_end_matches(|c| c != '/');
777    let mod_name = &without_ending[without_mod_name.len()..];
778    module_name_make_legal(mod_name)
779}
780
781//-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-
782// TESTS
783//-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-
784
785#[cfg(test)]
786mod tests {
787    use super::*;
788
789    /// Bogus test catalog that returns a fake module in response to any query with a single capital letter
790    /// used by `recursive_submodule_import_test`
791    #[derive(Debug)]
792    struct TestCatalog;
793
794    impl ModuleCatalog for TestCatalog {
795        fn lookup(&self, name: &str) -> Vec<ModuleDescriptor> {
796            if name.len() == 1 && name.chars().last().unwrap().is_uppercase() {
797                vec![ModuleDescriptor::new(name.to_string(), None, None)]
798            } else {
799                vec![]
800            }
801        }
802        fn get_loader(&self, _descriptor: &ModuleDescriptor) -> Result<Box<dyn ModuleLoader>, String> {
803            Ok(Box::new(TestCatalog))
804        }
805    }
806
807    impl ModuleLoader for TestCatalog {
808        fn load(&self, context: &mut RunContext) -> Result<(), String> {
809            let space = GroundingSpace::new();
810            context.init_self_module(space.into(), None);
811            Ok(())
812        }
813    }
814
815    /// This tests the core recursive sub-module loading code
816    #[test]
817    fn recursive_submodule_import_test() {
818
819        //Make a new runner with the TestCatalog
820        let runner = Metta::new(Some(EnvBuilder::test_env().push_module_catalog(TestCatalog)));
821
822        //Now try loading an inner-module, and make sure it can recursively load all the needed parents
823        let result = runner.run(SExprParser::new("!(import! &self A:B:C)"));
824        assert_eq!(result, Ok(vec![vec![expr!()]]));
825
826        //Test that each parent sub-module is indeed loaded
827        assert!(runner.get_module_by_name("A").is_ok());
828        assert!(runner.get_module_by_name("A:B").is_ok());
829        assert!(runner.get_module_by_name("A:B:C").is_ok());
830
831        //Test that we fail to load a module with an invalid parent, even if the module itself resolves
832        let _result = runner.run(SExprParser::new("!(import! &self a:B)"));
833        assert!(runner.get_module_by_name("a:B").is_err());
834    }
835
836    //
837    //LP-TODO-NEXT, Next make sure the catalogs are able to do the recursive loading from the file system,
838    // using their working dirs.  Maybe make this second test a C API test to get better coverage
839    //
840
841    //LP-TODO-NEXT, Add a test for loading a module from a DirCatalog by passing a name with an extension (ie. `my_mod.metta`) to `resolve`,
842    // and make sure the loaded module that comes back doesn't have the extension
843
844    #[derive(Debug)]
845    struct TestLoader {
846        pkg_info: PkgInfo,
847    }
848
849    impl TestLoader {
850        fn new() -> Self {
851            let mut pkg_info = PkgInfo::default();
852
853            //Set up the module [PkgInfo] so it knows to load a sub-module from git
854            pkg_info.name = Some("test-mod".to_string());
855            pkg_info.deps.insert("metta-morph-test".to_string(), DepEntry{
856                fs_path: None,
857                git_location: ModuleGitLocation {
858                    //TODO: We probably want a smaller test repo
859                    git_url: Some("https://github.com/trueagi-io/metta-morph/".to_string()),
860                    git_branch: None, //Some("Hyperpose".to_string()),
861                    git_subdir: None,
862                    git_main_file: Some(PathBuf::from("mettamorph.metta")),
863                    local_path: None,
864                },
865                version_req: None,
866            });
867            Self { pkg_info }
868        }
869    }
870
871    impl ModuleLoader for TestLoader {
872        fn load(&self, context: &mut RunContext) -> Result<(), String> {
873            let space = GroundingSpace::new();
874            context.init_self_module(space.into(), None);
875
876            Ok(())
877        }
878        fn pkg_info(&self) -> Option<&PkgInfo> {
879            Some(&self.pkg_info)
880        }
881    }
882
883    /// Tests that a module can be fetched from git and loaded, when the git URL is specified in
884    /// the module's PkgInfo.  This test requires a network connection
885    ///
886    /// NOTE.  Ignored because we may not want it fetching from the internet when running the
887    /// test suite.  Invoke `cargo test --features git git_pkginfo_fetch_test -- --ignored --nocapture` to run it.
888    #[ignore]
889    #[test]
890    fn git_pkginfo_fetch_test() {
891
892        //Make a new runner, with the config dir in `/tmp/hyperon-test/`
893        let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/"))));
894        let _mod_id = runner.load_module_direct(Box::new(TestLoader::new()), "test-mod").unwrap();
895
896        let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph-test)"));
897        assert_eq!(result, Ok(vec![vec![expr!()]]));
898
899        //Test that we can use a function imported from the module
900        let result = runner.run(SExprParser::new("!(sequential (A B))"));
901        assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]]));
902
903        runner.display_loaded_modules();
904    }
905
906    /// Tests that a module can be resolved in a remote cataloc, fetched from git and then
907    /// loaded.  This test requires a network connection
908    ///
909    /// NOTE.  Ignored because we may not want it fetching from the internet when running the
910    /// test suite.  Invoke `cargo test --features git git_remote_catalog_fetch_test -- --ignored --nocapture` to run it.
911    #[ignore]
912    #[test]
913    fn git_remote_catalog_fetch_test() {
914
915        //Make a new runner, with the config dir in `/tmp/hyperon-test/`
916        let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/"))));
917        let result = runner.run(SExprParser::new("!(import! &self metta-morph)"));
918        assert_eq!(result, Ok(vec![vec![expr!()]]));
919
920        //Test that we can use a function imported from the module
921        let result = runner.run(SExprParser::new("!(sequential (A B))"));
922        assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]]));
923
924        runner.display_loaded_modules();
925    }
926}