hyperon/metta/runner/pkg_mgmt/catalog.rs
1//!
2//! # Module Resolution
3//!
4//! ## Behavior of Module Resolution
5//!
6//! ```text
7//! ┌────────────────────┐ ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽ ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽
8//! ╱ ╱ ⎽⎼⎻⎺ pkg-info in ⎺⎻⎼⎽ Yes ⎽⎼⎻⎺pkg-info entry ⎺⎻⎼⎽ No
9//! ╱ (import!) ╱─────►< &self has entry for >─────►< has fs_path attrib? >───┐
10//! ╱ ╱ ⎺⎻⎼⎽ module? ⎽⎼⎻⎺ ⎺⎻⎼⎽ ⎽⎼⎻⎺ │
11//! └────────────────────┘ ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺ ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺ │
12//! │ No │ Yes │
13//! ┌─────────────────────────┐ ┌───────────▼─────────────┐ /───────────▼─────────────\ │
14//! │ Query ModuleCatalogs │ │ Assume any module │ │ Load the module at │ │
15//! │ in order, with │◄──┬─┤ version will satisfy │ │ the file-system path │ │
16//! │ version requirement │ │ │ dependency │ │ with first successful │ │
17//! │ │ │ │ │ │ FsModuleFormat │ │
18//! └───────────┬─────────────┘ │ └───────────▲─────────────┘ \───────────▲─────────────/ │
19//! │ │ │ │ │
20//! /───────────▼─────────────\ │ │ ┌───────────┴─────────────┐ │
21//! │ Load the module from │ │ │ │ clone module from │ │
22//! │ the first catalog │ │ │ │ remote repo to │ │
23//! │ that reports a │ │ │ │ local resource dir │ │
24//! │ successful match │ │ │ │ │ │
25//! \─────────────────────────/ │ │ └───────────▲─────────────┘ │
26//! │ │ No │ Yes │
27//! │Yes ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽ ⎽⎼⎻⎺ ⎺⎺⎺ ⎺⎻⎼⎽ │
28//! │ ⎽⎼⎻⎺pkg-info entry ⎺⎻⎼⎽ No ⎽⎼⎻⎺pkg-info entry ⎺⎻⎼⎽ │
29//! └< has version attrib? >◄─────< has git attrib? >───┘
30//! ⎺⎻⎼⎽ ⎽⎼⎻⎺ ⎺⎻⎼⎽ ⎽⎼⎻⎺
31//! ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺ ⎺⎻⎼⎽ ⎽⎽⎽ ⎽⎼⎻⎺
32//! ```
33//!
34
35//LP-TODO-NEXT make a test to make sure circular imports are caught and don't lead to infinite recursion
36//QUESTION: Should circular imports between modules be allowed? The current implementation (and possibly
37// the MeTTa language itself) disallow circular imports because there is no concept of forward declaration.
38// It *may* be possible to pre-parse the language in order to make recursive imports possible, but I have
39// not yet thought in detail about this.
40//
41
42//QUESTION on shared base dependencies & sat-set solving:
43//The currently implemented design resolves each module's dependencies in a straightforward depth-first
44// order. This is possible because the module system allows multiple instances of the same module to
45// be loaded simultaneously. So each module can pick its best dependencies based on its pkg-info and
46// the available catalogs.
47//However, consider the following situation:
48// ModA depends on ModI for some interface types (for example a special String type)
49// ModB depends on ModI for the same interface types, but ModA and ModB don't know about each other
50// ModTop depends on both ModA and ModB, and uses functionality in ModA to create some objects that
51// it expects ModB to be able to use. Therefore the system must guarantee the same version of ModI
52// gets imported by both ModA and ModB.
53//This is precisely the opposite behavior from the ability of a module to carry around "private"
54// dependencies and know that those dependencies will always be loaded, and they won't be substituted
55// for another version.
56//
57//I see several possible solutions:
58// 1.) We could disallow private dependencies altogether. This is the approach taken by Cargo.
59// However this contravenes some of the desiderata outlined in this issue:
60// https://github.com/trueagi-io/hyperon-experimental/issues/470
61// 2.) We could require explicit re-exporting of a dependency module used in a module's interface, which
62// would give the implementation an opportunity to find dependency module versions that work for
63// all other modules that use them in common. ie. solve for the sat set. Optionally, with this approach,
64// the module could also opt to re-export a private dependency as part of itself, making the interface
65// between ModA and ModB in the example deliberately incompatible.
66// 3.) We could require private dependencies to be explicitly specified as private in the pkg-info. With
67// the default behavior being a single module for each module name. This might be a reasonable compromise
68// between 1 & 2, however we would likely need some form of linting, so that a user doesn't shoot
69// themselves in the foot by exporting an interface that includes items from a private dependency
70//
71// I think my personal preference is for #2.
72
73use core::any::Any;
74use std::path::Path;
75use std::collections::hash_map::DefaultHasher;
76use std::hash::Hasher;
77use std::ffi::{OsStr, OsString};
78use std::collections::HashSet;
79
80use crate::metta::runner::modules::*;
81use crate::metta::runner::{*, git_catalog::*};
82
83use xxhash_rust::xxh3::xxh3_64;
84use serde::{Deserialize, Serialize};
85
86/// Implemented for types capable of locating MeTTa modules
87///
88/// For example, `ModuleCatalog` would be an interface to a module respository, analogous to `PyPI` or
89/// `crates.io` but `ModuleCatalog` is also implemented for [Path] because any file system directory may be
90/// capable of storing and indexing MeTTa modules.
91///
92/// `ModuleCatalog` types are closely connected with [ModuleLoader] types because the `ModuleCatalog` must
93/// recognize the module in whatever media it exists, and supply the `ModuleLoader` to load that module
94pub trait ModuleCatalog: std::fmt::Debug + Send + Sync {
95 /// The name of the catalog, to be displayed to the user
96 fn display_name(&self) -> String {
97 std::any::type_name::<Self>().to_string()
98 }
99
100 /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name
101 fn lookup(&self, name: &str) -> Vec<ModuleDescriptor>;
102
103 /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name,
104 /// and uid match
105 fn lookup_with_uid(&self, name: &str, uid: Option<u64>) -> Vec<ModuleDescriptor> {
106 self.lookup(name).into_iter().filter(|desc| desc.uid == uid).collect()
107 }
108
109 /// Returns the [ModuleDescriptor] for every module in the `ModuleCatalog` with the specified name
110 /// matching the version requirements
111 ///
112 /// NOTE: Unversioned modules will never match any version_req, so this method should never return
113 /// any un-versioned ModuleDescriptors if `version_req.is_some()`
114 fn lookup_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Vec<ModuleDescriptor> {
115 filter_by_version_req(self.lookup(name).into_iter(), version_req).collect()
116 }
117
118 /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that matches the
119 /// specified version requirement, or `None` if no module exists
120 ///
121 /// If `version_req == None`, this method should return the newest module available in the catalog
122 ///
123 /// NOTE: unversioned modules are considered to have the lowest possible version, and thus this method
124 /// should only return an unversioned module if no matching modules are available
125 /// NOTE: Unversioned modules will never match any version_req, so this method should never return
126 /// any un-versioned ModuleDescriptors if `version_req.is_some()`
127 fn lookup_newest_with_version_req(&self, name: &str, version_req: Option<&semver::VersionReq>) -> Option<ModuleDescriptor> {
128 find_newest_module(self.lookup_with_version_req(name, version_req).into_iter())
129 }
130
131 /// Returns the [ModuleDescriptor] for the newest module in the `ModuleCatalog`, that matches the
132 /// specified name, uid, and version requirement, or `None` if no module exists
133 ///
134 /// See [ModuleCatalog::lookup_newest_with_version_req] for more details
135 fn lookup_newest_with_uid_and_version_req(&self, name: &str, uid: Option<u64>, version_req: Option<&semver::VersionReq>) -> Option<ModuleDescriptor> {
136 let result_iter = self.lookup_with_uid(name, uid).into_iter();
137 find_newest_module(filter_by_version_req(result_iter, version_req))
138 }
139
140 /// Returns a [ModuleLoader] for the specified module from the `ModuleCatalog`
141 fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result<Box<dyn ModuleLoader>, String>;
142
143 /// Returns an iterator over every module available in the catalog. May not be supported
144 /// by all catalog implementations
145 fn list<'a>(&'a self) -> Option<Box<dyn Iterator<Item=ModuleDescriptor> + 'a>> {
146 None
147 }
148
149 /// Returns an iterator over every unique module name in the catalog. May not be supported
150 /// by all catalog implementations
151 fn list_names<'a>(&'a self) -> Option<Box<dyn Iterator<Item=String> + 'a>> {
152 self.list().map(|desc_iter| {
153 let mut names = HashSet::new();
154 for desc in desc_iter {
155 if !names.contains(desc.name()) {
156 names.insert(desc.name().to_string());
157 }
158 }
159 Box::new(names.into_iter()) as Box<dyn Iterator<Item=String>>
160 })
161 }
162
163 /// Returns an iterator over every unique (module name, uid) pair in the catalog. May not
164 /// be supported by all catalog implementations
165 fn list_name_uid_pairs<'a>(&'a self) -> Option<Box<dyn Iterator<Item=(String, Option<u64>)> + 'a>> {
166 self.list().map(|desc_iter| {
167 let mut results = HashSet::new();
168 for desc in desc_iter {
169 results.insert((desc.name().to_string(), desc.uid()));
170 }
171 Box::new(results.into_iter()) as Box<dyn Iterator<Item=(String, Option<u64>)>>
172 })
173 }
174
175 /// Returns the catalog as an [Any] in order to get back to the underlying object
176 fn as_any(&self) -> Option<&dyn Any> {
177 None
178 }
179
180 /// Synchronize the catalog's internal tables, so fresh upstream info is reflected
181 /// locally. Does not fetch any modules
182 fn sync_toc(&self, _update_mode: UpdateMode) -> Result<(), String> {
183 Ok(())
184 }
185
186 /// Returns the catalog as a [ManagedCatalog] if the catalog supports active management
187 fn as_managed(&self) -> Option<&dyn ManagedCatalog> {
188 None
189 }
190}
191
192impl dyn ModuleCatalog {
193 /// Returns the catalog as as an underlying type, if it's supported by the catalog format
194 pub fn downcast<T: 'static>(&self) -> Option<&T> {
195 self.as_any()?.downcast_ref()
196 }
197}
198
199/// Internal function to filter a set of [ModuleDescriptor]s by a [semver::VersionReq]. See
200/// [ModuleCatalog::lookup_with_version_req] for an explanation of behavior
201fn filter_by_version_req<'a>(mods_iter: impl Iterator<Item=ModuleDescriptor> + 'a, version_req: Option<&'a semver::VersionReq>) -> Box<dyn Iterator<Item=ModuleDescriptor> + 'a> {
202 match version_req {
203 Some(req) => Box::new(mods_iter.filter(|desc| {
204 match desc.version() {
205 Some(ver) => req.matches(ver),
206 None => false
207 }
208 })),
209 None => Box::new(mods_iter)
210 }
211}
212
213/// Internal function to find the newest module in a set. See [ModuleCatalog::lookup_newest_with_version_req]
214/// for an explanation of behavior
215pub(crate) fn find_newest_module(mods_iter: impl Iterator<Item=ModuleDescriptor>) -> Option<ModuleDescriptor> {
216 let mut highest_version: Option<semver::Version> = None;
217 let mut ret_desc = None;
218 for desc in mods_iter {
219 match desc.version().cloned() {
220 Some(ver) => {
221 match &mut highest_version {
222 Some(highest_ver) => {
223 if ver > *highest_ver {
224 *highest_ver = ver;
225 ret_desc = Some(desc);
226 }
227 },
228 None => {
229 ret_desc = Some(desc);
230 highest_version = Some(ver)
231 }
232 }
233 },
234 None => {
235 if highest_version.is_none() {
236 if let Some(ret_desc) = ret_desc {
237 log::warn!("Multiple un-versioned {} modules in catalog; impossible to select newest", ret_desc.name());
238 }
239 ret_desc = Some(desc)
240 }
241 }
242 }
243 }
244 ret_desc
245}
246
247/// The object responsible for locating and selecting dependency modules for each [MettaMod]
248///
249/// This structure is conceptually analogous to the a `Cargo.toml` file for a given module.
250#[derive(Clone, Debug, Default, Deserialize)]
251pub struct PkgInfo {
252
253 /// The public name of the module
254 ///
255 /// Should be composed of alpha-numeric characters with '-' and '_' characters allowed. Must not
256 /// contain any other punctuation
257 pub name: Option<String>,
258
259 /// The version of this module
260 ///
261 /// A `None` or missing version is considered inferior to all other versions
262 #[serde(default)]
263 pub version: Option<semver::Version>,
264
265 /// If `strict == true` then a dependency must be declared in the `PkgInfo`, otherwise a permissive
266 /// version requirement will be assumed for any modules that are not explicitly declared
267 #[serde(default)]
268 pub strict: bool,
269
270 /// Requirements for each dependency sub-module
271 ///
272 /// A Duplicate entry for a given sub-module in the deps list is an error.
273 #[serde(default)]
274 pub deps: HashMap<String, DepEntry>,
275}
276
277/// A single entry in a [PkgInfo]'s dependencies, specifying the properties of a module that will satisfy a dependency
278#[derive(Clone, Debug, Default, Deserialize)]
279pub struct DepEntry {
280 /// Indicates that the dependency module should be loaded from a specific FS path
281 ///
282 /// If the fs_path is specified, the other pkg_info attributes will be ignored.
283 #[serde(default)]
284 pub fs_path: Option<PathBuf>,
285
286 #[serde(flatten)]
287 git_location: ModuleGitLocation,
288
289 /// An acceptable version of version bounds to satisfy the dependency. None means any version
290 /// acceptable
291 #[serde(default)]
292 pub version_req: Option<semver::VersionReq>
293}
294
295impl PkgInfo {
296 /// Returns the version of the package
297 pub fn version(&self) -> Option<&semver::Version> {
298 self.version.as_ref()
299 }
300 /// Returns the version of the package as a [semver compliant](https://semver.org) string of bytes
301 pub fn version_bytes(&self) -> Result<Vec<u8>, String> {
302 match self.version() {
303 Some(ver) => Ok(format!("{ver}").into_bytes()),
304 None => Err("no version available".to_string())
305 }
306 }
307}
308
309/// Resolves which module to load from which available location or catalog, and returns the [ModuleLoader] to
310/// load that module
311pub(crate) fn resolve_module(pkg_info: Option<&PkgInfo>, context: &RunContext, name_path: &str) -> Result<Option<(Box<dyn ModuleLoader>, ModuleDescriptor)>, String> {
312 let mod_name = mod_name_from_path(name_path);
313
314 //Make sure the name is a legal module name
315 if !module_name_is_legal(mod_name) {
316 return Err(format!("Illegal module name: {mod_name}"));
317 }
318
319 //See if we have a pkg_info dep entry for the module
320 let mut version_req = None;
321 if let Some(entry) = pkg_info.as_ref().and_then(|pkg_info| pkg_info.deps.get(mod_name)) {
322
323 //If path is explicitly specified in the dep entry, then we must load the module at the
324 // specified path, and cannot search anywhere else
325 if let Some(path) = &entry.fs_path {
326 return loader_for_module_at_path(context.metta.environment().fs_mod_formats(), path, Some(mod_name), context.module().resource_dir());
327 }
328
329 //Get the module if it's specified with git keys
330 if entry.git_location.get_url().is_some() {
331 match context.metta.environment().specified_mods.as_ref() {
332 Some(specified_mods) => if let Some(pair) = specified_mods.loader_for_explicit_git_module(mod_name, UpdateMode::FetchIfMissing, &entry.git_location)? {
333 return Ok(Some(pair));
334 },
335 None => return Err(format!("Unable to pull module \"{mod_name}\" from git; no local \"caches\" directory available"))
336 }
337 }
338
339 //If `version_req` is specified in the dep entry, then use it to constrain the catalog search
340 version_req = entry.version_req.as_ref();
341 } else {
342 //If the PkgInfo doesn't have an entry for the module and the PkgInfo is flagged as "strict"
343 // then we will not attempt to resolve the module any further, and the resolution will fail.
344 if let Some(pkg_info) = &pkg_info {
345 if pkg_info.strict {
346 return Ok(None);
347 }
348 }
349 }
350
351 //Search the module's resource dir before searching the environment's catalogs
352 // This allows a module to import another module inside its directory or as a peer of itself for
353 // single-file modules, without including an explicit PkgInfo dep entry. On the other hand, If we
354 // want to require module authors to include a dep entry to be explicit about their dependencies, we
355 // can remove this catalog
356 let resource_dir_catalog;
357 let mut local_catalogs = vec![];
358 if let Some(mod_resource_dir) = context.module().resource_dir() {
359 if context.metta.environment().working_dir() != Some(mod_resource_dir) {
360 resource_dir_catalog = DirCatalog::new(PathBuf::from(mod_resource_dir), context.metta().environment().fs_mod_formats.clone());
361 local_catalogs.push(&resource_dir_catalog as &dyn ModuleCatalog);
362 }
363 }
364
365 //Search the catalogs, starting with the resource dir, and continuing to the runner's Environment
366 for catalog in local_catalogs.into_iter().chain(context.metta.environment().catalogs()) {
367 log::trace!("Looking for module: \"{mod_name}\" inside {catalog:?}");
368 match catalog.lookup_newest_with_version_req(mod_name, version_req) {
369 Some(descriptor) => {
370 log::info!("Found module: \"{mod_name}\" inside {:?}", catalog.display_name());
371 log::info!("Preparing to load module: \'{}\' as \'{}\'", descriptor.name, name_path);
372 return Ok(Some((catalog.get_loader(&descriptor)?, descriptor)))
373 },
374 None => {}
375 }
376 }
377
378 Ok(None)
379}
380
381/// Internal function to get a loader for a module at a specific file system path, by trying each FsModuleFormat in order
382pub(crate) fn loader_for_module_at_path<'a, P: AsRef<Path>, FmtIter: Iterator<Item=&'a dyn FsModuleFormat>>(fmts: FmtIter, path: P, name: Option<&str>, search_dir: Option<&Path>) -> Result<Option<(Box<dyn ModuleLoader>, ModuleDescriptor)>, String> {
383
384 //If the path is not an absolute path, assume it's relative to the running search_dir
385 let path = if path.as_ref().is_absolute() {
386 PathBuf::from(path.as_ref())
387 } else {
388 search_dir.ok_or_else(|| format!("Error loading {}. Working directory or module resource dir required to load modules by relative path", path.as_ref().display()))?
389 .join(path)
390 };
391
392 //If a mod name was supplied, we want to make sure it's not a full name path
393 let name = match name {
394 Some(name) => Some(mod_name_from_path(name)),
395 None => None
396 };
397
398 //Check all module formats, to try and load the module at the path
399 for fmt in fmts {
400 if let Some((loader, descriptor)) = fmt.try_path(&path, name) {
401 return Ok(Some((loader, descriptor)))
402 }
403 }
404
405 Err(format!("No module format able to interpret module at {}", path.display()))
406}
407
408/// A loader for a MeTTa module that lives within a single `.metta` file
409#[derive(Debug)]
410pub(crate) struct SingleFileModule {
411 path: PathBuf,
412 pkg_info: PkgInfo,
413}
414
415impl SingleFileModule {
416 fn new(path: &Path, pkg_info: PkgInfo) -> Self {
417 Self {path: path.into(), pkg_info }
418 }
419 fn open_file(&self) -> Result<std::fs::File, String> {
420 std::fs::File::open(&self.path)
421 .map_err(|err| format!("Could not read file, path: {}, error: {}", self.path.display(), err))
422 }
423}
424
425impl ModuleLoader for SingleFileModule {
426 fn load(&self, context: &mut RunContext) -> Result<(), String> {
427
428 let space = GroundingSpace::new();
429 let resource_dir = self.path.parent().unwrap();
430 context.init_self_module(space.into(), Some(resource_dir.into()));
431
432 let parser = SExprParser::new(std::io::BufReader::new(self.open_file()?));
433 context.push_parser(Box::new(parser));
434
435 Ok(())
436 }
437 fn get_resource(&self, res_key: ResourceKey) -> Result<Resource, String> {
438 match res_key {
439 ResourceKey::MainMettaSrc => self.open_file().map(Into::<Resource>::into),
440 ResourceKey::Version => self.pkg_info.version_bytes().map(Into::<Resource>::into),
441 _ => Err("unsupported resource key".to_string())
442 }
443 }
444}
445
446/// A loader for a MeTTa module implemented as a directory
447///
448/// A `DirModule` can contain MeTTa code in a `module.metta` file, but any directory may
449/// be explicitly loaded as a module, making the directory contents available as resources.
450///
451/// See the "Anatomy of a Directory Module" section in the LP-TODO Finish writeup of user-level guide
452#[derive(Debug)]
453pub(crate) struct DirModule {
454 path: PathBuf,
455 pkg_info: PkgInfo,
456}
457
458impl DirModule {
459 fn new(path: &Path, pkg_info: PkgInfo) -> Self {
460 Self { path: path.into(), pkg_info }
461 }
462 fn open_file(&self) -> Result<std::fs::File, String> {
463 let module_metta_path = self.path.join("module.metta");
464 std::fs::File::open(module_metta_path)
465 .map_err(|err| format!("Could not read file, path: {}, error: {}", self.path.display(), err))
466 }
467}
468
469impl ModuleLoader for DirModule {
470 fn load(&self, context: &mut RunContext) -> Result<(), String> {
471
472 let space = GroundingSpace::new();
473 let resource_dir = &self.path;
474 context.init_self_module(space.into(), Some(resource_dir.into()));
475
476 // A module.metta file is optional. Without one a dir module behaves as just
477 // a container for other resources and sub-modules.
478 if let Some(program_file) = self.open_file().ok() {
479 let parser = SExprParser::new(std::io::BufReader::new(program_file));
480 context.push_parser(Box::new(parser));
481 }
482
483 Ok(())
484 }
485 fn get_resource(&self, res_key: ResourceKey) -> Result<Resource, String> {
486 match res_key {
487 ResourceKey::MainMettaSrc => self.open_file()
488 .map_err(|_| format!("no module.metta file found in {} dir module", self.path.display()))
489 .map(Into::<Resource>::into),
490 ResourceKey::Version => self.pkg_info.version_bytes()
491 .map(Into::<Resource>::into),
492 _ => Err("unsupported resource key".to_string())
493 }
494 }
495}
496
497/// Implemented on a type to test if a given file-system path points to a MeTTa module, and to construct
498/// possible paths within a parent directory for a module of a certain name
499///
500/// Objects implementing this trait work with in conjunction with [DirCatalog] and [PkgInfo] to facilitate
501/// loading modules from include directories, specific paths, and remote `git` repositories.
502pub trait FsModuleFormat: std::fmt::Debug + Send + Sync {
503
504 /// Returns the possible paths inside a parent directory which may point to a module
505 ///
506 /// NOTE: This function is allowed to return paths that may not be valid. Paths returned
507 /// from this method will be passed to [Self::try_path] to validate them.
508 fn paths_for_name(&self, parent_dir: &Path, mod_name: &str) -> Vec<PathBuf>;
509
510 /// Checks a specific path, and returns a [ModuleLoader] and a [ModuleDescriptor] if a
511 /// supported module resides at the path
512 ///
513 /// This method should return `None` if the path does not point to a valid module in the
514 /// implemented format.
515 fn try_path(&self, path: &Path, mod_name: Option<&str>) -> Option<(Box<dyn ModuleLoader>, ModuleDescriptor)>;
516}
517
518/// Arbitrary number unlikely to be chosen by another FsModuleFormat
519const SINGLE_FILE_MOD_FMT_ID: u64 = u64::MAX - 5000;
520
521/// Arbitrary number unlikely to be chosen by another FsModuleFormat
522const DIR_MOD_FMT_ID: u64 = u64::MAX - 5001;
523
524/// An object to identify and load a single-file module (naked .metta files)
525#[derive(Debug)]
526pub struct SingleFileModuleFmt;
527
528impl FsModuleFormat for SingleFileModuleFmt {
529 fn paths_for_name(&self, parent_dir: &Path, mod_name: &str) -> Vec<PathBuf> {
530 let base_path = parent_dir.join(mod_name);
531 let extended_path = push_extension(&base_path, ".metta");
532 vec![base_path, extended_path]
533 }
534 fn try_path(&self, path: &Path, mod_name: Option<&str>) -> Option<(Box<dyn ModuleLoader>, ModuleDescriptor)> {
535 if path.is_file() {
536 let mod_name = match mod_name {
537 Some(mod_name) => mod_name,
538 None => path.file_stem().unwrap().to_str().unwrap(), //LP-TODO-NEXT: Unify the code to extract the mod-name from the file name between here and DirModuleFmt::try_path
539 };
540
541 //TODO: parse out the module version here, and pass it to new_with_path_and_fmt_id below
542 //In a single-file module, the discriptor information will be embedded within the MeTTa code
543 // Therefore, we need to parse the whole text of the module looking for a `_pkg-info` atom,
544 // that we can then convert into a PkgInfo structure
545 let pkg_info = PkgInfo::default();
546
547 let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), None, path, SINGLE_FILE_MOD_FMT_ID);
548 let loader = Box::new(SingleFileModule::new(path, pkg_info));
549 Some((loader, descriptor))
550 } else {
551 None
552 }
553 }
554}
555
556/// An object to identify and load a MeTTa module implemented as a directory
557#[derive(Debug)]
558pub struct DirModuleFmt;
559
560impl FsModuleFormat for DirModuleFmt {
561 fn paths_for_name(&self, parent_dir: &Path, mod_name: &str) -> Vec<PathBuf> {
562 let path = parent_dir.join(mod_name);
563 vec![path]
564 }
565 fn try_path(&self, path: &Path, mod_name: Option<&str>) -> Option<(Box<dyn ModuleLoader>, ModuleDescriptor)> {
566 if path.is_dir() {
567
568 //First see if we can extract a [PkgInfo] from a `pkg-info.json` file
569 let mut pkg_info: Option<PkgInfo> = None;
570 let pkginfo_json_path = path.join("pkg-info.json");
571 if pkginfo_json_path.exists() {
572 let file_contents = std::fs::read_to_string(&pkginfo_json_path).unwrap();
573 pkg_info = Some(serde_json::from_str(&file_contents).unwrap());
574 }
575
576 //TODO: Also check for a `pkg-info.metta` file, as soon as I have implemented Atom-Serde
577 // Also try and parse a `_pkg-info` atom from the `module.metta` file if it's not in a dedicated file
578
579 let pkg_info = pkg_info.unwrap_or_else(|| PkgInfo::default());
580
581 //Get the module name, first use the name provided. If none, then use the name from the
582 // pkg-info, and if that's also none, construct a module name from the file name
583 let full_path;
584 let mod_name = match mod_name {
585 Some(mod_name) => mod_name,
586 None => {
587 match &pkg_info.name {
588 Some(name) => name,
589 None => {
590 //LP-TODO-Next: I need to gracefully create a legal module name from the file name
591 // if the file name happens to contain characters that are illegal in a module name
592 full_path = path.canonicalize().unwrap();
593 full_path.file_stem().unwrap().to_str().unwrap()
594 }
595 }
596 },
597 };
598
599 let version = pkg_info.version.clone();
600 let descriptor = ModuleDescriptor::new_with_path_and_fmt_id(mod_name.to_string(), version, path, DIR_MOD_FMT_ID);
601 let loader = Box::new(DirModule::new(path, pkg_info));
602 return Some((loader, descriptor));
603 }
604 None
605 }
606}
607
608/// Implements ModuleCatalog to load MeTTa modules from a file-system directory trying a number of
609/// [FsModuleFormat] formats in succession
610#[derive(Debug)]
611pub struct DirCatalog {
612 path: PathBuf,
613 fmts: Arc<Vec<Box<dyn FsModuleFormat>>>,
614}
615
616impl DirCatalog {
617 /// Internal function to initialize a DirCatalog for a directory in the module search path
618 pub(crate) fn new(path: PathBuf, fmts: Arc<Vec<Box<dyn FsModuleFormat>>>) -> Self {
619 Self {path, fmts}
620 }
621}
622
623impl ModuleCatalog for DirCatalog {
624 fn display_name(&self) -> String {
625 format!("Dir \"{}\"", self.path.display())
626 }
627 fn lookup(&self, name: &str) -> Vec<ModuleDescriptor> {
628
629 //QUESTION: How should we handle modules with an internal "package-name" that differs from their
630 // name in the file system?
631 //
632 //Cargo treats the module's internal "package-name" as authoritative, but that means it's impossible
633 // to "install" a module simply by dropping it into a directory in the search path, because there
634 // needs to be an index of all available modules in that directory.
635 //
636 //For us, I think we want a less formal approach akin to Python's, where we are allowed to drop a
637 // module into a directory, and import it with a naked `import` statement (i.e. no pkg-info entry)
638 // but for that to work, we need to stipulate that it's possible to infer a file name from a
639 // module name.
640 //
641 //NOTE: This is not a limitation across all catalogs, just the `DirCatalog` If a catalog is able
642 // to maintain its own index of module names, it can store the modules any way it wants to.
643 //
644
645 let mut found_modules = vec![];
646
647 //Inspect the directory using each FsModuleFormat, in order
648 visit_modules_in_dir_using_mod_formats(&self.fmts, &self.path, name, |_loader, descriptor| {
649 found_modules.push(descriptor);
650 true
651 });
652
653 found_modules
654 }
655 fn get_loader(&self, descriptor: &ModuleDescriptor) -> Result<Box<dyn ModuleLoader>, String> {
656
657 let mut matching_module = None;
658 visit_modules_in_dir_using_mod_formats(&self.fmts, &self.path, &descriptor.name, |loader, resolved_descriptor| {
659 if &resolved_descriptor == descriptor {
660 matching_module = Some(loader);
661 true
662 } else {
663 false
664 }
665 });
666
667 match matching_module {
668 Some(loader) => Ok(loader),
669 None => Err(format!("Failed to load module {} in directory {}", &descriptor.name, self.path.display()))
670 }
671 }
672}
673
674/// Internal Utility Function. Blindly appends an extension onto a path, even if the path already
675/// has an extension
676fn push_extension(path: &Path, extension: impl AsRef<OsStr>) -> PathBuf {
677 let mut os_string: OsString = path.into();
678 os_string.push(extension.as_ref());
679 os_string.into()
680}
681
682/// Internal function to try FsModuleFormat formats in order. If the closure returns `true` this function
683/// will exit, otherwise it will try every path returned by every format
684fn visit_modules_in_dir_using_mod_formats(fmts: &[Box<dyn FsModuleFormat>], dir_path: &Path, mod_name: &str, mut f: impl FnMut(Box<dyn ModuleLoader>, ModuleDescriptor) -> bool) {
685
686 for fmt in fmts {
687 for path in fmt.paths_for_name(dir_path, mod_name) {
688 if let Some((loader, descriptor)) = fmt.try_path(&path, Some(mod_name)) {
689 if f(loader, descriptor) {
690 return;
691 }
692 }
693 }
694 }
695}
696
697/// A data structure that uniquely identifies an exact instance of a module
698///
699/// If two modules have the same ModuleDescriptor, they are considered to be the same module
700///
701/// The uid field encodes particulars about a module so it will never be mistaken for another copy
702/// or variation of the module even if the version field is the same. For example, a module loaded
703/// from the file system will use the uid to hash the path, while a module fetched from git will
704/// hash the url and branch.
705#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
706pub struct ModuleDescriptor {
707 name: String,
708 uid: Option<u64>,
709 version: Option<semver::Version>,
710}
711
712impl core::fmt::Display for ModuleDescriptor {
713 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
714 write!(f, "{}", self.name)?;
715 if let Some(version) = &self.version {
716 write!(f, " @{version}")?;
717 }
718 if let Some(uid) = self.uid {
719 write!(f, " #{uid:016x}")?;
720 }
721 Ok(())
722 }
723}
724
725impl ModuleDescriptor {
726 /// Create a new ModuleDescriptor
727 pub fn new(name: String, version: Option<semver::Version>, uid: Option<u64>) -> Self {
728 Self { name, uid, version }
729 }
730 /// Returns a new ModuleDescriptor by computing a stable hash of the `ident` bytes, and using the `fmt_id`
731 pub fn new_with_ident_bytes_and_fmt_id(name: String, version: Option<semver::Version>, ident: &[u8], fmt_id: u64) -> Self {
732 let uid = Self::uid_from_ident_bytes_and_fmt_id(ident, fmt_id);
733 ModuleDescriptor::new(name, version, Some(uid))
734 }
735 /// Create a new ModuleDescriptor using a file system path and another unique id
736 ///
737 /// The descriptor's uid is based on a stable-hash of the path, because a module loaded by
738 /// path shouldn't be substituted for any other module unless it's from the same path.
739 ///
740 /// The purpose of the `fmt_id` is to ensure two different formats or catalogs don't generate
741 /// the same ModuleDescriptor, but you can pass 0 if it doesn't matter
742 pub fn new_with_path_and_fmt_id(name: String, version: Option<semver::Version>, path: &Path, fmt_id: u64) -> Self {
743 Self::new_with_ident_bytes_and_fmt_id(name, version, path.as_os_str().as_encoded_bytes(), fmt_id)
744 }
745 /// Returns the name of the module represented by the ModuleDescriptor
746 pub fn name(&self) -> &str {
747 &self.name
748 }
749 /// Returns the uid associated with the ModuleDescriptor
750 pub fn uid(&self) -> Option<u64> {
751 self.uid
752 }
753 /// Returns the version of the module represented by the ModuleDescriptor
754 pub fn version(&self) -> Option<&semver::Version> {
755 self.version.as_ref()
756 }
757 /// Internal. Use the Hash trait to get a uid for the whole ModuleDescriptor
758 pub fn hash(&self) -> u64 {
759 let mut hasher = DefaultHasher::new();
760 std::hash::Hash::hash(self, &mut hasher);
761 hasher.finish()
762 }
763 /// Returns a uid based on a stable hash of `the ident` bytes, and the fmt_id
764 pub fn uid_from_ident_bytes_and_fmt_id(ident: &[u8], fmt_id: u64) -> u64 {
765 xxh3_64(ident) ^ fmt_id
766 }
767}
768
769/// Extracts the module name from a `.git` URL
770///
771/// For example, `https://github.com/trueagi-io/hyperon-experimental.git` would be parsed
772/// into "hyperon-experimental". Returns None if the form of the URL isn't recognized
773pub fn mod_name_from_url(url: &str) -> Option<String> {
774 let without_ending = url.trim_end_matches("/")
775 .trim_end_matches(".git");
776 let without_mod_name = without_ending.trim_end_matches(|c| c != '/');
777 let mod_name = &without_ending[without_mod_name.len()..];
778 module_name_make_legal(mod_name)
779}
780
781//-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-
782// TESTS
783//-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-=-=-+-
784
785#[cfg(test)]
786mod tests {
787 use super::*;
788
789 /// Bogus test catalog that returns a fake module in response to any query with a single capital letter
790 /// used by `recursive_submodule_import_test`
791 #[derive(Debug)]
792 struct TestCatalog;
793
794 impl ModuleCatalog for TestCatalog {
795 fn lookup(&self, name: &str) -> Vec<ModuleDescriptor> {
796 if name.len() == 1 && name.chars().last().unwrap().is_uppercase() {
797 vec![ModuleDescriptor::new(name.to_string(), None, None)]
798 } else {
799 vec![]
800 }
801 }
802 fn get_loader(&self, _descriptor: &ModuleDescriptor) -> Result<Box<dyn ModuleLoader>, String> {
803 Ok(Box::new(TestCatalog))
804 }
805 }
806
807 impl ModuleLoader for TestCatalog {
808 fn load(&self, context: &mut RunContext) -> Result<(), String> {
809 let space = GroundingSpace::new();
810 context.init_self_module(space.into(), None);
811 Ok(())
812 }
813 }
814
815 /// This tests the core recursive sub-module loading code
816 #[test]
817 fn recursive_submodule_import_test() {
818
819 //Make a new runner with the TestCatalog
820 let runner = Metta::new(Some(EnvBuilder::test_env().push_module_catalog(TestCatalog)));
821
822 //Now try loading an inner-module, and make sure it can recursively load all the needed parents
823 let result = runner.run(SExprParser::new("!(import! &self A:B:C)"));
824 assert_eq!(result, Ok(vec![vec![expr!()]]));
825
826 //Test that each parent sub-module is indeed loaded
827 assert!(runner.get_module_by_name("A").is_ok());
828 assert!(runner.get_module_by_name("A:B").is_ok());
829 assert!(runner.get_module_by_name("A:B:C").is_ok());
830
831 //Test that we fail to load a module with an invalid parent, even if the module itself resolves
832 let _result = runner.run(SExprParser::new("!(import! &self a:B)"));
833 assert!(runner.get_module_by_name("a:B").is_err());
834 }
835
836 //
837 //LP-TODO-NEXT, Next make sure the catalogs are able to do the recursive loading from the file system,
838 // using their working dirs. Maybe make this second test a C API test to get better coverage
839 //
840
841 //LP-TODO-NEXT, Add a test for loading a module from a DirCatalog by passing a name with an extension (ie. `my_mod.metta`) to `resolve`,
842 // and make sure the loaded module that comes back doesn't have the extension
843
844 #[derive(Debug)]
845 struct TestLoader {
846 pkg_info: PkgInfo,
847 }
848
849 impl TestLoader {
850 fn new() -> Self {
851 let mut pkg_info = PkgInfo::default();
852
853 //Set up the module [PkgInfo] so it knows to load a sub-module from git
854 pkg_info.name = Some("test-mod".to_string());
855 pkg_info.deps.insert("metta-morph-test".to_string(), DepEntry{
856 fs_path: None,
857 git_location: ModuleGitLocation {
858 //TODO: We probably want a smaller test repo
859 git_url: Some("https://github.com/trueagi-io/metta-morph/".to_string()),
860 git_branch: None, //Some("Hyperpose".to_string()),
861 git_subdir: None,
862 git_main_file: Some(PathBuf::from("mettamorph.metta")),
863 local_path: None,
864 },
865 version_req: None,
866 });
867 Self { pkg_info }
868 }
869 }
870
871 impl ModuleLoader for TestLoader {
872 fn load(&self, context: &mut RunContext) -> Result<(), String> {
873 let space = GroundingSpace::new();
874 context.init_self_module(space.into(), None);
875
876 Ok(())
877 }
878 fn pkg_info(&self) -> Option<&PkgInfo> {
879 Some(&self.pkg_info)
880 }
881 }
882
883 /// Tests that a module can be fetched from git and loaded, when the git URL is specified in
884 /// the module's PkgInfo. This test requires a network connection
885 ///
886 /// NOTE. Ignored because we may not want it fetching from the internet when running the
887 /// test suite. Invoke `cargo test --features git git_pkginfo_fetch_test -- --ignored --nocapture` to run it.
888 #[ignore]
889 #[test]
890 fn git_pkginfo_fetch_test() {
891
892 //Make a new runner, with the config dir in `/tmp/hyperon-test/`
893 let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/"))));
894 let _mod_id = runner.load_module_direct(Box::new(TestLoader::new()), "test-mod").unwrap();
895
896 let result = runner.run(SExprParser::new("!(import! &self test-mod:metta-morph-test)"));
897 assert_eq!(result, Ok(vec![vec![expr!()]]));
898
899 //Test that we can use a function imported from the module
900 let result = runner.run(SExprParser::new("!(sequential (A B))"));
901 assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]]));
902
903 runner.display_loaded_modules();
904 }
905
906 /// Tests that a module can be resolved in a remote cataloc, fetched from git and then
907 /// loaded. This test requires a network connection
908 ///
909 /// NOTE. Ignored because we may not want it fetching from the internet when running the
910 /// test suite. Invoke `cargo test --features git git_remote_catalog_fetch_test -- --ignored --nocapture` to run it.
911 #[ignore]
912 #[test]
913 fn git_remote_catalog_fetch_test() {
914
915 //Make a new runner, with the config dir in `/tmp/hyperon-test/`
916 let runner = Metta::new(Some(EnvBuilder::new().set_config_dir(Path::new("/tmp/hyperon-test/"))));
917 let result = runner.run(SExprParser::new("!(import! &self metta-morph)"));
918 assert_eq!(result, Ok(vec![vec![expr!()]]));
919
920 //Test that we can use a function imported from the module
921 let result = runner.run(SExprParser::new("!(sequential (A B))"));
922 assert_eq!(result, Ok(vec![vec![sym!("A"), sym!("B")]]));
923
924 runner.display_loaded_modules();
925 }
926}