Source code for PyGamLab.databases.COD


import math
import requests
from typing import List, Dict, Union
import warnings
import re
from collections import defaultdict

[docs] class COD_Explorer: """ Python wrapper for the Crystallography Open Database (COD). Provides methods to search materials by chemical formula, retrieve COD IDs, and fetch crystallographic information files (CIFs). Attributes ---------- timeout : int Request timeout in seconds for HTTP queries. url : str Base URL of the COD website. api_url : str Endpoint URL for COD search queries. formula : str Last processed chemical formula in COD format. all_elements : list[str] List of all recognized chemical elements (1- and 2-letter symbols). Examples -------- >>> explorer = COD_Explorer(timeout=30) >>> ids = explorer.search_materials("TiO2") >>> ids [900856, 900857, 901234] >>> structure = explorer.fetch_structure(ids[0]) >>> print(structure['cif'][:200]) # Print first 200 chars of CIF """ all_elements = [ "H","He","Li","Be","B","C","N","O","F","Ne","Na","Mg","Al","Si","P","S","Cl","Ar", "K","Ca","Sc","Ti","V","Cr","Mn","Fe","Co","Ni","Cu","Zn","Ga","Ge","As","Se","Br","Kr", "Rb","Sr","Y","Zr","Nb","Mo","Tc","Ru","Rh","Pd","Ag","Cd","In","Sn","Sb","Te","I","Xe", "Cs","Ba","La","Ce","Pr","Nd","Pm","Sm","Eu","Gd","Tb","Dy","Ho","Er","Tm","Yb","Lu", "Hf","Ta","W","Re","Os","Ir","Pt","Au","Hg","Tl","Pb","Bi","Po","At","Rn", "Fr","Ra","Ac","Th","Pa","U","Np","Pu","Am","Cm","Bk","Cf","Es","Fm","Md","No","Lr", "Rf","Db","Sg","Bh","Hs","Mt","Ds","Rg","Cn","Nh","Fl","Mc","Lv","Ts","Og" ] def __init__(self, timeout: int = 60): """ Initialize a COD_Explorer instance. Parameters ---------- timeout : int, optional Timeout in seconds for HTTP requests (default is 60). Example ------- >>> explorer = COD_Explorer(timeout=30) """ self.timeout = timeout self.url = "https://www.crystallography.net/" self.api_url = f"{self.url}/cod/result" self.formula=''
[docs] def search_materials(self,formula): """ Search COD for materials matching a chemical formula. Parameters ---------- formula : str Chemical formula (e.g., 'TiO2', 'C6H6'). Returns ------- List[int] List of COD IDs corresponding to structures matching the formula. Example ------- >>> explorer = COD_Explorer() >>> ids = explorer.search_materials("C6H6") >>> print(ids) [123456, 123457] """ ids=self._get_cod_ids(formula) return ids
def _get_cod_ids(self, formula: str) -> List[int]: """ Internal method to query COD and return IDs for a given formula. Parameters ---------- formula : str Chemical formula in standard notation. Returns ------- List[int] List of COD IDs found. Raises ------ requests.HTTPError If the COD server returns an error. Example ------- >>> explorer = COD_Explorer() >>> explorer._get_cod_ids("H2O") [856789, 856790] """ cod_formula=self._get_cod_format_formula(formula) #cod_formula = Composition(formula).hill_formula print('---------') print(cod_formula) params = {"formula": cod_formula, "format": "json"} response = requests.get(self.api_url, params=params, timeout=self.timeout) response.raise_for_status() return [int(entry["file"]) for entry in response.json()] def _corrected_formula(self, comp, ordered): """ Convert element counts into a COD-compatible spaced formula. Parameters ---------- comp : dict Mapping of element symbols to quantities. ordered : list[str] List of element symbols in the desired order (Hill system). Returns ------- str Space-separated formula with integer quantities removed for 1s, e.g., 'C H6 O'. Example ------- >>> comp = {'C': 6, 'H': 6, 'O': 1} >>> ordered = ['C', 'H', 'O'] >>> explorer._corrected_formula(comp, ordered) 'C H6 O' """ parts = [] for e in ordered: count = int(comp[e]) if count == 0 or count == 1: parts.append(f"{e}") else: parts.append(f"{e}{count}") return " ".join(parts) def _get_cod_format_formula(self, formula: str) -> str: """ Parse a chemical formula and return a COD-compatible format. The formula is parsed case-insensitively and supports 1- or 2-letter elements. The resulting formula is Hill-ordered and spaced, e.g., 'Ti O2', 'C6 H6 O'. Parameters ---------- formula : str Input chemical formula (e.g., 'TiO2', 'C6H6'). Returns ------- str Formatted COD-compatible chemical formula. Raises ------ ValueError If an unknown element is found in the formula. Example ------- >>> explorer = COD_Explorer() >>> explorer._get_cod_format_formula("H2O") 'H2 O' """ formula = formula.strip().replace(" ", "").lower() comp = defaultdict(float) i = 0 n = len(formula) while i < n: # Try two-letter element if i+1 < n and formula[i:i+2].capitalize() in self.all_elements: el = formula[i:i+2].capitalize() i += 2 print(el) elif formula[i].capitalize() in self.all_elements: el = formula[i].capitalize() i += 1 print(el) else: raise ValueError(f"Unknown element starting at '{formula[i:]}'") # Parse optional number num = "" while i < n and (formula[i].isdigit() or formula[i] == "."): num += formula[i] i += 1 amt = float(num) if num else 1.0 comp[el] += amt # Hill ordering els = list(comp.keys()) if "C" in els: ordered = ["C"] if "H" in els: ordered.append("H") ordered += sorted([e for e in els if e not in {"C", "H"}]) else: ordered = sorted(els) #return " ".join(f"{e}{self._formula_double_format(comp[e], ignore_ones=True)}" for e in ordered) final=self._corrected_formula(comp,ordered) self.formula=final return final #pre_final= " ".join(f"{e}{int(comp[e])}" for e in ordered) #final=pre_final.replace('1','').replace('0','') #return final def _get_cif_by_id(self, cod_id: int) -> str: """ Retrieve the raw CIF text for a given COD ID. Parameters ---------- cod_id : int COD ID of the desired crystal structure. Returns ------- str Raw CIF text from the COD database. Raises ------ requests.HTTPError If the COD server returns an error or the CIF is not found. Example ------- >>> explorer = COD_Explorer() >>> cif_text = explorer._get_cif_by_id(856789) >>> print(cif_text[:200]) """ response = requests.get(f"{self.url}cod/{cod_id}.cif", timeout=self.timeout) response.raise_for_status() return response.text
[docs] def fetch_structure(self,cod_id): """ Fetch structure data for a given COD ID. Parameters ---------- cod_id : int COD ID of the structure to fetch. Returns ------- dict Dictionary containing: - 'formula': COD-formatted chemical formula - 'cod_id': COD ID of the structure - 'cif': Raw CIF content as a string Example ------- >>> explorer = COD_Explorer() >>> ids = explorer.search_materials("TiO2") >>> structure = explorer.fetch_structure(ids[0]) >>> print(structure['formula']) 'Ti O2' """ #structure_results=[] #ids=self._get_cod_ids(formula) #for specific_id in ids: structure_results={'formula' : self.formula, 'cod_id' : cod_id, 'cif': self._get_cif_by_id(cod_id)} return structure_results
[docs] def fetch_all_data(self,cod_id): """ Fetch all available data for a given COD ID. Parameters ---------- cod_id : int COD ID of the structure to fetch. Returns ------- dict Dictionary containing: - 'formula': COD-formatted chemical formula - 'cod_id': COD ID of the structure - 'cif': Raw CIF content as a string Notes ----- Currently identical to `fetch_structure` but designed for future expansion to include more metadata fields from COD. Example ------- >>> explorer = COD_Explorer() >>> data = explorer.fetch_all_data(856789) >>> print(data.keys()) dict_keys(['formula', 'cod_id', 'cif']) """ structure_results={'formula' : self.formula, 'cod_id' : cod_id, 'cif': self._get_cif_by_id(cod_id)} return structure_results