Skip to content

Commit

Permalink
Speed up emission plot from packets
Browse files Browse the repository at this point in the history
  • Loading branch information
lukeshingles committed Feb 14, 2024
1 parent cc85001 commit 1c41846
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 26 deletions.
2 changes: 2 additions & 0 deletions artistools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,15 @@
from artistools.misc import get_deposition
from artistools.misc import get_dirbin_labels
from artistools.misc import get_elsymbol
from artistools.misc import get_elsymbols_df
from artistools.misc import get_elsymbolslist
from artistools.misc import get_escaped_arrivalrange
from artistools.misc import get_file_metadata
from artistools.misc import get_filterfunc
from artistools.misc import get_grid_mapping
from artistools.misc import get_inputparams
from artistools.misc import get_ion_tuple
from artistools.misc import get_ionstage_roman_numeral_df
from artistools.misc import get_ionstring
from artistools.misc import get_linelist_dataframe
from artistools.misc import get_linelist_pldf
Expand Down
75 changes: 65 additions & 10 deletions artistools/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,20 @@ def get_elsymbolslist() -> list[str]:
]


def get_elsymbols_df() -> pl.DataFrame:
"""Return a polars DataFrame of atomic number and element symbols."""
return (
pl.read_csv(
at.get_config()["path_datadir"] / "elements.csv",
separator=",",
has_header=True,
dtypes={"Z": pl.Int32},
)
.drop("name")
.rename({"symbol": "elsymbol", "Z": "atomic_number"})
)


def get_atomic_number(elsymbol: str) -> int:
"""Return the atomic number of an element symbol."""
assert elsymbol is not None
Expand All @@ -625,6 +639,17 @@ def decode_roman_numeral(strin: str) -> int:
return -1


def get_ionstage_roman_numeral_df() -> pl.DataFrame:
"""Return a polars DataFrame of ionisation stage and roman numerals."""
return pl.DataFrame(
{
"ionstage": list(range(1, len(roman_numerals))),
"ionstage_roman": roman_numerals[1:],
},
schema={"ionstage": pl.Int32, "ionstage_roman": pl.Utf8},
)


def get_elsymbol(atomic_number: int | np.int64) -> str:
"""Return the element symbol of an atomic number."""
return get_elsymbolslist()[atomic_number]
Expand Down Expand Up @@ -946,7 +971,7 @@ def merge_pdf_files(pdf_files: list[str]) -> None:
print(f"Files merged and saved to {resultfilename}.pdf")


def get_bflist(modelpath: Path | str) -> pl.DataFrame:
def get_bflist(modelpath: Path | str, get_ion_str: bool = False) -> pl.LazyFrame:
"""Return a dict of bound-free transitions from bflist.out."""
compositiondata = get_composition_data(modelpath)
bflistpath = firstexisting(["bflist.out", "bflist.dat"], folder=modelpath, tryzipped=True)
Expand All @@ -955,24 +980,37 @@ def get_bflist(modelpath: Path | str) -> pl.DataFrame:
dfboundfree = pl.read_csv(
bflistpath,
skip_rows=1,
has_header=False,
separator=" ",
new_columns=["i", "elementindex", "ionindex", "lowerlevel", "upperionlevel"],
new_columns=["bfindex", "elementindex", "ionindex", "lowerlevel", "upperionlevel"],
dtypes={
"i": pl.Int32,
"bfindex": pl.Int32,
"elementindex": pl.Int32,
"ionindex": pl.Int32,
"lowerlevel": pl.Int32,
"upperionlevel": pl.Int32,
},
)
).lazy()

dfboundfree = dfboundfree.with_columns(
atomic_number=pl.col("elementindex").map_elements(lambda elementindex: compositiondata["Z"][elementindex]),
ionstage=pl.col("ionindex")
+ pl.col("elementindex").map_elements(lambda elementindex: compositiondata["lowermost_ionstage"][elementindex]),
)
return dfboundfree.drop(["elementindex", "ionindex"]).select(
["atomic_number", "ionstage", "lowerlevel", "upperionlevel"]
)

dfboundfree = dfboundfree.drop(["elementindex", "ionindex"])

if get_ion_str:
dfgroupstrings = dfboundfree.group_by(["atomic_number", "ionstage"]).agg(
pl.map_groups(
exprs=[pl.col("atomic_number").first(), pl.col("ionstage").first()],
function=lambda z_ionstage: f"{at.get_ionstring(z_ionstage[0].item(), z_ionstage[1].item())} bound-free",
).alias("ion_str"),
)

dfboundfree = dfboundfree.join(dfgroupstrings, how="left", on=["atomic_number", "ionstage"])

return dfboundfree


linetuple = namedtuple("linetuple", "lambda_angstroms atomic_number ionstage upperlevelindex lowerlevelindex")
Expand Down Expand Up @@ -1002,7 +1040,7 @@ def read_linestatfile(filepath: Path | str) -> tuple[int, list[float], list[int]
return nlines, lambda_angstroms, atomic_numbers, ionstages, upper_levels, lower_levels


def get_linelist_pldf(modelpath: Path | str) -> pl.LazyFrame:
def get_linelist_pldf(modelpath: Path | str, get_ion_str: bool = False) -> pl.LazyFrame:
textfile = at.firstexisting("linestat.out", folder=modelpath)
parquetfile = Path(modelpath, "linelist.out.parquet")
if not parquetfile.is_file() or parquetfile.stat().st_mtime < textfile.stat().st_mtime:
Expand All @@ -1018,20 +1056,37 @@ def get_linelist_pldf(modelpath: Path | str) -> pl.LazyFrame:
"lower_level": lower_levels,
},
)
# .with_columns(pl.col(pl.Int64).cast(pl.Int32))
.with_columns(pl.col(pl.Int64).cast(pl.Int32))
.with_row_count(name="lineindex")
)
pldf.write_parquet(parquetfile, compression="zstd")
print(f"Saved {parquetfile}")
else:
print(f"Reading {parquetfile}")

return (
linelist_lazy = (
pl.scan_parquet(parquetfile)
.with_columns(upperlevelindex=pl.col("upper_level") - 1, lowerlevelindex=pl.col("lower_level") - 1)
.drop(["upper_level", "lower_level"])
.with_columns(pl.col(pl.Int64).cast(pl.Int32))
)

if get_ion_str:
linelist_lazy = (
linelist_lazy.group_by(["atomic_number", "ionstage"])
.agg(
pl.struct(pl.exclude(["atomic_number", "ionstage"])).alias("othercols"),
pl.map_groups(
exprs=[pl.col("atomic_number").first(), pl.col("ionstage").first()],
function=lambda z_ionstage: f"{at.get_ionstring(z_ionstage[0].item(), z_ionstage[1].item())} bound-bound",
).alias("ion_str"),
)
.explode("othercols")
.unnest("othercols")
)

return linelist_lazy


@lru_cache(maxsize=8)
def get_linelist_dataframe(
Expand Down
2 changes: 1 addition & 1 deletion artistools/packets/packets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import artistools as at

# for the parquet files
time_parquetschemachange = (2023, 11, 19, 12, 0, 0)
time_parquetschemachange = (2024, 2, 14, 11, 0, 0)

CLIGHT = 2.99792458e10
DAY = 86400
Expand Down
50 changes: 35 additions & 15 deletions artistools/spectra/spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,16 +814,22 @@ def get_flux_contributions_from_packets(
if groupby in {"terms", "upperterm"}:
adata = at.atomic.get_levels(modelpath)

linelist = at.get_linelist_pldf(modelpath=modelpath).collect()
bflist = at.get_bflist(modelpath)
emtypecolumn = "emissiontype" if use_lastemissiontype else "trueemissiontype"

linelistlazy = at.get_linelist_pldf(modelpath=modelpath, get_ion_str=True)
bflistlazy = at.get_bflist(modelpath, get_ion_str=True)

if groupby != "ion":
linelist = linelistlazy.collect()
bflist = bflistlazy.collect()

def get_emprocesslabel(emtype: int) -> str:
if emtype >= 0:
line = linelist.row(emtype, named=True)

if groupby == "line":
return (
f"{at.get_ionstring(line['atomic_number'], line['ionstage'])} "
f"{line['ion_str']} "
f"λ{line['lambda_angstroms']:.0f} "
f"({line['upperlevelindex']}-{line['lowerlevelindex']})"
)
Expand All @@ -834,15 +840,15 @@ def get_emprocesslabel(emtype: int) -> str:
upper_term_noj = upper_config.split("_")[-1].split("[")[0]
lower_config = ion.levels.iloc[line["lowerlevelindex"]].levelname
lower_term_noj = lower_config.split("_")[-1].split("[")[0]
return f"{at.get_ionstring(line['atomic_number'], line['ionstage'])} {upper_term_noj}->{lower_term_noj}"
return f"{line['ion_str']} {upper_term_noj}->{lower_term_noj}"

if groupby == "upperterm":
ion = adata[(adata["Z"] == line["atomic_number"]) & (adata["ionstage"] == line["ionstage"])].iloc[0]
upper_config = ion.levels.iloc[line["upperlevelindex"]].levelname
upper_term_noj = upper_config.split("_")[-1].split("[")[0]
return f"{at.get_ionstring(line['atomic_number'], line['ionstage'])} {upper_term_noj}"
return f"{line['ion_str']} {upper_term_noj}"

return f"{at.get_ionstring(line['atomic_number'], line['ionstage'])} bound-bound"
return line["ion_str"]

if emtype == -9999999:
return "free-free"
Expand All @@ -869,11 +875,10 @@ def get_absprocesslabel(abstype: int) -> str:
return "free-free"
return "bound-free" if abstype == -2 else "? other absorp."

emtypecolumn = "emissiontype" if use_lastemissiontype else "trueemissiontype"

nprocs_read, lzdfpackets = at.packets.get_packets_pl(
modelpath, maxpacketfiles=maxpacketfiles, packet_type="TYPE_ESCAPE", escape_type="TYPE_RPKT"
)

if emissionvelocitycut is not None:
lzdfpackets = at.packets.add_derived_columns_lazy(lzdfpackets)
lzdfpackets = lzdfpackets.filter(pl.col("emission_velocity") > emissionvelocitycut)
Expand All @@ -885,13 +890,28 @@ def get_absprocesslabel(abstype: int) -> str:

if getemission:
cols |= {"emissiontype_str", "nu_rf"}

emtypes = lzdfpackets.select(emtypecolumn).collect().get_column(emtypecolumn).unique().sort()
lzdfpackets = lzdfpackets.join(
pl.DataFrame({emtypecolumn: emtypes, "emissiontype_str": emtypes.map_elements(get_emprocesslabel)}).lazy(),
on=emtypecolumn,
how="left",
)
if groupby == "ion":
emtypestrings = pl.concat(
[
linelistlazy.select([pl.col("lineindex").cast(pl.Int32).alias(emtypecolumn), pl.col("ion_str")]),
pl.DataFrame(
{emtypecolumn: [-9999999], "ion_str": "free-free"},
schema_overrides={emtypecolumn: pl.Int32, "ion_str": pl.String},
).lazy(),
bflistlazy.select([(-1 - pl.col("bfindex").cast(pl.Int32)).alias(emtypecolumn), pl.col("ion_str")]),
],
).rename({"ion_str": "emissiontype_str"})

lzdfpackets = lzdfpackets.join(emtypestrings, on=emtypecolumn, how="left")
else:
emtypes = lzdfpackets.select(emtypecolumn).collect().get_column(emtypecolumn).unique().sort()
lzdfpackets = lzdfpackets.join(
pl.DataFrame(
{emtypecolumn: emtypes, "emissiontype_str": emtypes.map_elements(get_emprocesslabel)}
).lazy(),
on=emtypecolumn,
how="left",
)

if getabsorption:
cols |= {"absorptiontype_str", "absorption_freq"}
Expand Down

0 comments on commit 1c41846

Please sign in to comment.