forked from RSFraLab/RSFraLab.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparsePubs.py
More file actions
82 lines (65 loc) · 2.76 KB
/
parsePubs.py
File metadata and controls
82 lines (65 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from collections import defaultdict
with open("publications/cfranken2024_doi.bib") as f:
file = f.read()
def parse_entry(entry,write=True):
# breakdown into lines
lines = [x.strip(",").strip("}") for x in entry.split("\n")]
# remove empty lines
lines = [x.replace("\t","") for x in lines if x != ""]
# get file name
file_name = lines[0].split("{")[1].replace("/", "")
# itialize dictionary
article_dict = defaultdict(lambda: "", key="some_value")
for x in lines[1:]:
# split into key and value
y = x.split(" = ")
if len(y) == 1:
y = x.split("=")
article_dict[y[0].strip(" ").lower()] = y[1].strip('"').strip("{").strip("}").replace("{","").replace("}","").replace("\\n","").replace("\\'","").replace("\\`","").replace("\\o","o").replace("\\e","e").replace("\\","").replace("---","-").replace("--","-").replace('"',"")
# if article_dict['author'].startswith("Butz,"):
# print(article_dict['author'])
article_dict['type'] = 'article'
article_dict['toc'] = 'false'
article_dict['publication'] = article_dict['journal']
article_dict['year'] = article_dict['year']
# write to file
if write:
with open(f"publications/{file_name}.qmd", "w") as f:
f.write("---\n")
props = ['title', 'author', 'type','year','publication', 'doi', 'materials', 'toc']
for prop in props:
if (prop == 'toc') | (prop == 'year'):
f.write(f'{prop}: {article_dict[prop]}\n')
else:
f.write(f'{prop}: "{article_dict[prop]}"\n')
f.write("---\n\n## Abstract\n\n")
f.write(article_dict["abstract"])
for row in file.split("@")[1:]:
if row != "":
parse_entry(row)
import bibtexparser
import requests
def find_doi(title, author):
# Format a query to the CrossRef API
query = f"{title} {author}"
url = f"https://api.crossref.org/works?query={query}"
response = requests.get(url)
if response.status_code == 200:
results = response.json()['message']['items']
if results:
return results[0].get('DOI', None)
return None
# Load your BibTeX file
with open('publications/cfranken2024.bib') as bibtex_file:
bib_database = bibtexparser.load(bibtex_file)
# Iterate over entries
for entry in bib_database.entries:
if 'doi' not in entry:
title = entry.get('title', '')
author = entry.get('author', '').split(' and ')[0] # First author
doi = find_doi(title, author)
if doi:
entry['doi'] = doi
# Save the updated BibTeX file
with open('publications/cfranken2024_doi.bib', 'w') as bibtex_file:
bibtexparser.dump(bib_database, bibtex_file)