Skip to content

Commit 8c594d7

Browse files
esoteric-ephemeraAaron Kaplanjanosh
authored
Update POTCAR summary stats to include 6.4 POTCARs and add dev_script utils for future updates (#3370)
* Added option for 64 POTCARs, hidden func to regenerate potcar_summary_stats file used in validation * Updated potcar_summary_stats.json.gz to include 64 POTCARs * Verify new POTCAR_64 summary_stats work; prep for PR * pre-commit auto-fixes * Added (1) support for LDA 64 POTCARs; (2) ability to generate fake POTCARs from existing POTCARs by randomizing the data contained in them, dev_scripts/potcar_scrambler.py; (3) unit test for pymatgen.io.vasp.inputs._gen_potcar_summary_stats by generating summary stats for a library of fake POTCARs and then checking that the fake set passes PotcarSingle.is_valid with overriden stats * google-style doc str * replace print with warnings.warn * refactor test_gen_potcar_summary_stats using pytest fixtures * rename function arg PMG_VASP_PSP_DIR to vasp_psp_dir * cleanup fake potcar library to only include a few required examples * replace os.system('rm -rf') with shutil.rmtree() and system(f"mkdir -p") with os.makedirs(exist_ok=True) * git mv tests/files/fake_{POTCAR,potcar}_library * generate_fake_potcar_libraries prefix src_dirs with SETTINGS["PMG_VASP_PSP_DIR"] --------- Co-authored-by: Aaron Kaplan <[email protected]> Co-authored-by: Janosh Riebesell <[email protected]>
1 parent 2a43d25 commit 8c594d7

File tree

10 files changed

+258
-13
lines changed

10 files changed

+258
-13
lines changed

dev_scripts/potcar_scrambler.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import shutil
5+
import warnings
6+
7+
import numpy as np
8+
from monty.serialization import zopen
9+
10+
from pymatgen.core import SETTINGS
11+
from pymatgen.io.vasp import Potcar, PotcarSingle
12+
from pymatgen.io.vasp.sets import _load_yaml_config
13+
14+
15+
class PotcarScrambler:
16+
17+
"""
18+
Takes a POTCAR and replaces its values with completely random values
19+
Does type matching and attempts precision matching on floats to ensure
20+
file is read correctly by Potcar and PotcarSingle classes.
21+
22+
Used to generate copyright-compliant POTCARs for PMG tests.
23+
24+
In case of questions, contact Aaron Kaplan <[email protected]>.
25+
26+
Recommended use:
27+
PotcarScrambler.from_file(
28+
input_filename = <input POTCAR name as str>,
29+
output_filename = <name of desired randomized POTCAR as str>
30+
)
31+
to generate a POTCAR with name `output_filename` with completely random values
32+
from existing POTCAR `input_filename`
33+
"""
34+
35+
def __init__(self, potcars: Potcar | PotcarSingle):
36+
if isinstance(potcars, PotcarSingle):
37+
self.PSP_list = [potcars]
38+
else:
39+
self.PSP_list = potcars
40+
self.scrambled_potcars_str = ""
41+
for psp in self.PSP_list:
42+
scrambled_potcar_str = self.scramble_single_potcar(psp)
43+
self.scrambled_potcars_str += scrambled_potcar_str
44+
return
45+
46+
def _rand_float_from_str_with_prec(self, input_str: str, bloat: float = 1.5):
47+
n_prec = len(input_str.split(".")[1])
48+
bd = max(1, bloat * abs(float(input_str)))
49+
return round(bd * np.random.rand(1)[0], n_prec)
50+
51+
def _read_fortran_str_and_scramble(self, input_str: str, bloat: float = 1.5):
52+
input_str = input_str.strip()
53+
54+
if input_str.lower() in ["t", "f"] or input_str.lower() in ["true", "false"]:
55+
return bool(np.random.randint(2))
56+
57+
if input_str.upper() == input_str.lower() and input_str[0].isnumeric():
58+
if "." in input_str:
59+
return self._rand_float_from_str_with_prec(input_str, bloat=bloat)
60+
integer = int(input_str)
61+
fac = int(np.sign(integer)) # return int of same sign
62+
return fac * np.random.randint(abs(max(1, int(np.ceil(bloat * integer)))))
63+
try:
64+
float(input_str)
65+
return self._rand_float_from_str_with_prec(input_str, bloat=bloat)
66+
except ValueError:
67+
return input_str
68+
69+
def scramble_single_potcar(self, potcar: PotcarSingle):
70+
scrambled_potcar_str = ""
71+
for line in potcar.data.split("\n")[:-1]:
72+
single_line_rows = line.split(";")
73+
if "SHA256" in line or "COPYR" in line:
74+
# files not copyrighted, remove copyright statement
75+
# sha256 no longer applicable
76+
continue
77+
78+
cline = ""
79+
for idx, row in enumerate(single_line_rows):
80+
split_row = row.split()
81+
for itmp, tmp in enumerate(split_row):
82+
cline += f"{self._read_fortran_str_and_scramble(tmp)}"
83+
if itmp < len(split_row) - 1:
84+
cline += " "
85+
if len(single_line_rows) > 1 and idx == 0:
86+
cline += "; "
87+
88+
aux_str = ""
89+
if "TITEL" in line:
90+
aux_str = " FAKE"
91+
scrambled_potcar_str += f"{cline}{aux_str}\n"
92+
return scrambled_potcar_str
93+
94+
def to_file(self, filename: str):
95+
with zopen(filename, "wt") as f:
96+
f.write(self.scrambled_potcars_str)
97+
98+
@staticmethod
99+
def from_file(input_filename: str, output_filename: str | None = None):
100+
psp = Potcar.from_file(input_filename)
101+
psp_scrambled = PotcarScrambler(psp)
102+
if output_filename:
103+
psp_scrambled.to_file(output_filename)
104+
return psp_scrambled
105+
106+
107+
def generate_fake_potcar_libraries():
108+
"""
109+
To test the `_gen_potcar_summary_stats` function in `pymatgen.io.vasp.inputs`,
110+
need a library of fake POTCARs which do not violate copyright
111+
"""
112+
mp_relax_set = _load_yaml_config("MPRelaxSet")
113+
psp_variants = [mp_relax_set["POTCAR"][element] for element in mp_relax_set["POTCAR"]]
114+
115+
output_dir = "./fake_potcar_library/"
116+
shutil.rmtree(output_dir, ignore_errors=True)
117+
118+
vasp_psp_dir = SETTINGS.get("PMG_VASP_PSP_DIR")
119+
src_dirs = [f"{vasp_psp_dir}/{func_dir}" for func_dir in PotcarSingle.functional_dir.values()]
120+
121+
if not any(map(os.path.isdir, src_dirs)):
122+
raise RuntimeError(f"No input POTCAR library found, tried {src_dirs}")
123+
124+
for func_dir in src_dirs:
125+
if not os.path.isdir(func_dir):
126+
continue
127+
128+
for psp_name in psp_variants:
129+
rebase_dir = f"{output_dir}/{func_dir}/{psp_name}/"
130+
paths_to_try = [
131+
f"{func_dir}/POTCAR.{psp_name}",
132+
f"{func_dir}/POTCAR.{psp_name}.gz",
133+
f"{func_dir}/{psp_name}/POTCAR",
134+
f"{func_dir}/{psp_name}/POTCAR.gz",
135+
]
136+
if not any(map(os.path.isfile, paths_to_try)):
137+
warnings.warn(f"Could not find {psp_name} in {paths_to_try}")
138+
for potcar_path in paths_to_try:
139+
if os.path.isfile(potcar_path):
140+
os.makedirs(rebase_dir, exist_ok=True)
141+
PotcarScrambler.from_file(input_filename=potcar_path, output_filename=f"{rebase_dir}/POTCAR.gz")
142+
break
143+
144+
145+
if __name__ == "__main__":
146+
generate_fake_potcar_libraries()

pymatgen/io/vasp/inputs.py

Lines changed: 79 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from monty.json import MontyDecoder, MSONable
2828
from monty.os import cd
2929
from monty.os.path import zpath
30-
from monty.serialization import loadfn
30+
from monty.serialization import dumpfn, loadfn
3131
from tabulate import tabulate
3232

3333
from pymatgen.core import SETTINGS
@@ -1597,13 +1597,26 @@ class PotcarSingle:
15971597
are raised if a POTCAR hash fails validation.
15981598
"""
15991599

1600+
"""
1601+
NB: there are multiple releases of the {LDA,PBE} {52,54} POTCARs
1602+
the original (univie) releases include no SHA256 hashes nor COPYR fields
1603+
in the PSCTR/header field.
1604+
We indicate the older release in `functional_dir` as PBE_52, PBE_54, LDA_52, LDA_54.
1605+
The newer release is indicated as PBE_52_W_HASH, etc.
1606+
"""
16001607
functional_dir = dict(
16011608
PBE="POT_GGA_PAW_PBE",
16021609
PBE_52="POT_GGA_PAW_PBE_52",
1610+
PBE_52_W_HASH="POTPAW_PBE_52",
16031611
PBE_54="POT_GGA_PAW_PBE_54",
1612+
PBE_54_W_HASH="POTPAW_PBE_54",
1613+
PBE_64="POT_PAW_PBE_64",
16041614
LDA="POT_LDA_PAW",
16051615
LDA_52="POT_LDA_PAW_52",
1616+
LDA_52_W_HASH="POTPAW_LDA_52",
16061617
LDA_54="POT_LDA_PAW_54",
1618+
LDA_54_W_HASH="POTPAW_LDA_54",
1619+
LDA_64="POT_LDA_PAW_64",
16071620
PW91="POT_GGA_PAW_PW91",
16081621
LDA_US="POT_LDA_US",
16091622
PW91_US="POT_GGA_US_PW91",
@@ -2106,8 +2119,8 @@ def md5_header_hash(self) -> str:
21062119
def is_valid(self) -> bool:
21072120
"""
21082121
Check that POTCAR matches reference metadata.
2109-
Parsed metadata is stored in self._meta as a human-readable dict,
2110-
self._meta = {
2122+
Parsed metadata is stored in self._summary_stats as a human-readable dict,
2123+
self._summary_stats = {
21112124
"keywords": {
21122125
"header": list[str],
21132126
"data": list[str],
@@ -2135,17 +2148,17 @@ def is_valid(self) -> bool:
21352148
Note also that POTCARs can contain **different** data keywords
21362149
21372150
All keywords found in the header, essentially self.keywords, and the data block
2138-
(<Data Keyword> above) are stored in self._meta["keywords"]
2151+
(<Data Keyword> above) are stored in self._summary_stats["keywords"]
21392152
21402153
To avoid issues of copyright, statistics (mean, mean of abs vals, variance, max, min)
21412154
for the numeric values in the header and data sections of POTCAR are stored
2142-
in self._meta["stats"]
2155+
in self._summary_stats["stats"]
21432156
21442157
tol is then used to match statistical values within a tolerance
21452158
"""
21462159
functional_lexch = {
2147-
"PE": ["PBE", "PBE_52", "PBE_54"],
2148-
"CA": ["LDA", "LDA_52", "LDA_54", "LDA_US", "Perdew_Zunger81"],
2160+
"PE": ["PBE", "PBE_52", "PBE_52_W_HASH", "PBE_54", "PBE_54_W_HASH", "PBE_64"],
2161+
"CA": ["LDA", "LDA_52", "LDA_52_W_HASH", "LDA_54", "LDA_54_W_HASH", "LDA_64", "LDA_US", "Perdew_Zunger81"],
21492162
"91": ["PW91", "PW91_US"],
21502163
}
21512164

@@ -2164,8 +2177,9 @@ def is_valid(self) -> bool:
21642177
)
21652178

21662179
def parse_fortran_style_str(input_str: str) -> Any:
2167-
"""Parse any input string as bool, int, float, or failing that, str. Used to parse FORTRAN-generated
2168-
POTCAR files where it's unknown a priori what type of data will be encountered.
2180+
"""Parse any input string as bool, int, float, or failing that, str.
2181+
Used to parse FORTRAN-generated POTCAR files where it's unknown
2182+
a priori what type of data will be encountered.
21692183
"""
21702184
input_str = input_str.strip()
21712185

@@ -2225,7 +2239,9 @@ def data_stats(data_list: Sequence) -> dict:
22252239
"MAX": arr.max(),
22262240
}
22272241

2228-
summary_stats = { # for this PotcarSingle instance
2242+
# NB: to add future summary stats in a way that's consistent with PMG,
2243+
# it's easiest to save the summary stats as an attr of PotcarSingle
2244+
self._summary_stats = { # for this PotcarSingle instance
22292245
"keywords": {
22302246
"header": [kwd.lower() for kwd in self.keywords],
22312247
"data": psp_keys,
@@ -2239,12 +2255,12 @@ def data_stats(data_list: Sequence) -> dict:
22392255
data_match_tol = 1e-6
22402256
for ref_psp in possible_potcar_matches:
22412257
key_match = all(
2242-
set(ref_psp["keywords"][key]) == set(summary_stats["keywords"][key]) # type: ignore
2258+
set(ref_psp["keywords"][key]) == set(self._summary_stats["keywords"][key]) # type: ignore
22432259
for key in ["header", "data"]
22442260
)
22452261

22462262
data_diff = [
2247-
abs(ref_psp["stats"][key][stat] - summary_stats["stats"][key][stat]) # type: ignore
2263+
abs(ref_psp["stats"][key][stat] - self._summary_stats["stats"][key][stat]) # type: ignore
22482264
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
22492265
for key in ["header", "data"]
22502266
]
@@ -2274,6 +2290,57 @@ def __repr__(self) -> str:
22742290
return f"{cls_name}({symbol=}, {functional=}, {TITEL=}, {VRHFIN=}, {n_valence_elec=:.0f})"
22752291

22762292

2293+
def _gen_potcar_summary_stats(
2294+
append: bool = False,
2295+
vasp_psp_dir: str | None = None,
2296+
summary_stats_filename: str = f"{module_dir}/potcar_summary_stats.json.gz",
2297+
):
2298+
"""
2299+
This function solely intended to be used for PMG development to regenerate the
2300+
potcar_summary_stats.json.gz file used to validate POTCARs
2301+
2302+
THIS FUNCTION IS DESTRUCTIVE. It will completely overwrite your potcar_summary_stats.json.gz.
2303+
2304+
Args:
2305+
append (bool): Change whether data is appended to the existing potcar_summary_stats.json.gz,
2306+
or if a completely new file is generated. Defaults to False.
2307+
PMG_VASP_PSP_DIR (str): Change where this function searches for POTCARs
2308+
defaults to the PMG_VASP_PSP_DIR environment variable if not set. Defaults to None.
2309+
summary_stats_filename (str): Name of the output summary stats file. Defaults to
2310+
'<pymatgen_install_dir>/io/vasp/potcar_summary_stats.json.gz'.
2311+
"""
2312+
func_dir_exist: dict[str, str] = {}
2313+
vasp_psp_dir = vasp_psp_dir or SETTINGS.get("PMG_VASP_PSP_DIR")
2314+
for func in PotcarSingle.functional_dir:
2315+
cpsp_dir = f"{vasp_psp_dir}/{PotcarSingle.functional_dir[func]}"
2316+
if os.path.isdir(cpsp_dir):
2317+
func_dir_exist[func] = PotcarSingle.functional_dir[func]
2318+
else:
2319+
warnings.warn(f"missing {PotcarSingle.functional_dir[func]} POTCAR directory")
2320+
2321+
# use append = True if a new POTCAR library is released to add new summary stats
2322+
# without completely regenerating the dict of summary stats
2323+
# use append = False to completely regenerate the summary stats dict
2324+
new_summary_stats = loadfn(summary_stats_filename) if append else {}
2325+
2326+
for func in func_dir_exist:
2327+
new_summary_stats.setdefault(func, {}) # initialize dict if key missing
2328+
2329+
potcar_list = [
2330+
*glob(f"{vasp_psp_dir}/{func_dir_exist[func]}/POTCAR*"),
2331+
*glob(f"{vasp_psp_dir}/{func_dir_exist[func]}/*/POTCAR*"),
2332+
]
2333+
for potcar in potcar_list:
2334+
psp = PotcarSingle.from_file(potcar)
2335+
new_summary_stats[func][psp.TITEL.replace(" ", "")] = {
2336+
"LEXCH": psp.LEXCH,
2337+
"VRHFIN": psp.VRHFIN.replace(" ", ""),
2338+
**psp._summary_stats,
2339+
}
2340+
2341+
dumpfn(new_summary_stats, summary_stats_filename)
2342+
2343+
22772344
class Potcar(list, MSONable):
22782345
"""
22792346
Object for reading and writing POTCAR files for calculations. Consists of a
196 KB
Binary file not shown.
85.7 KB
Binary file not shown.
103 KB
Binary file not shown.
85.2 KB
Binary file not shown.
86.1 KB
Binary file not shown.
104 KB
Binary file not shown.
85.4 KB
Binary file not shown.

tests/io/vasp/test_inputs.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
import pytest
1111
import scipy.constants as const
1212
from monty.io import zopen
13+
from monty.serialization import loadfn
1314
from numpy.testing import assert_allclose
14-
from pytest import approx
15+
from pytest import MonkeyPatch, approx
1516

1617
from pymatgen.core import SETTINGS
1718
from pymatgen.core.composition import Composition
@@ -26,6 +27,7 @@
2627
PotcarSingle,
2728
UnknownPotcarWarning,
2829
VaspInput,
30+
_gen_potcar_summary_stats,
2931
)
3032
from pymatgen.util.testing import TEST_FILES_DIR, PymatgenTest
3133

@@ -1210,3 +1212,33 @@ def test_from_directory(self):
12101212
dct = vi.as_dict()
12111213
vasp_input = VaspInput.from_dict(dct)
12121214
assert "CONTCAR.Li2O" in vasp_input
1215+
1216+
1217+
def test_gen_potcar_summary_stats(tmp_path: Path, monkeypatch: MonkeyPatch):
1218+
"""Regenerate the potcar_summary_stats.json.gz file used to validate POTCARs with scrambled POTCARs."""
1219+
psp_path = f"{TEST_FILES_DIR}/fake_potcar_library/"
1220+
summ_stats_file = f"{tmp_path}/fake_potcar_summary_stats.json.gz"
1221+
_gen_potcar_summary_stats(append=False, vasp_psp_dir=psp_path, summary_stats_filename=summ_stats_file)
1222+
1223+
# only checking for two directories to save space, fake POTCAR library is big
1224+
summ_stats = loadfn(summ_stats_file)
1225+
assert set(summ_stats) == (expected_funcs := {"LDA_64", "PBE_54_W_HASH"})
1226+
1227+
# The fake POTCAR library is pretty big even with just two sub-libraries
1228+
# just copying over entries to work with PotcarSingle.is_valid
1229+
for func in PotcarSingle.functional_dir:
1230+
if func in expected_funcs:
1231+
continue
1232+
if "pbe" in func.lower() or "pw91" in func.lower():
1233+
summ_stats[func] = summ_stats["PBE_54_W_HASH"].copy()
1234+
elif "lda" in func.lower() or "perdew_zunger81" in func.lower():
1235+
summ_stats[func] = summ_stats["LDA_64"].copy()
1236+
1237+
# override reference potcar_summary_stats with fake data
1238+
monkeypatch.setattr(PotcarSingle, "potcar_summary_stats", summ_stats)
1239+
1240+
for func in expected_funcs:
1241+
bdir = f"{psp_path}/{PotcarSingle.functional_dir[func]}"
1242+
valid_elements = [x for x in os.listdir(f"{bdir}") if x[0] != "." and os.path.isdir(f"{bdir}/{x}")]
1243+
for element in valid_elements:
1244+
assert PotcarSingle.from_file(f"{bdir}/POTCAR.{element}.gz").is_valid

0 commit comments

Comments
 (0)