Skip to content

Commit 67116d5

Browse files
authored
Merge pull request #1 from SLin417/sonia-inspirefetcher
Improve INSPIRE citekey handling and cleanup
2 parents dc61614 + 547d217 commit 67116d5

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

jablib/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import org.jabref.logic.importer.util.MediaTypes;
2323
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
2424
import org.jabref.logic.net.URLDownload;
25+
import org.jabref.model.entry.Author;
26+
import org.jabref.model.entry.AuthorList;
2527
import org.jabref.model.entry.BibEntry;
2628
import org.jabref.model.entry.field.StandardField;
2729
import org.jabref.model.entry.field.UnknownField;
@@ -78,6 +80,90 @@ public void doPostCleanup(BibEntry entry) {
7880
new FieldFormatterCleanup(StandardField.TITLE, new RemoveEnclosingBracesFormatter()).cleanup(entry);
7981

8082
new FieldFormatterCleanup(StandardField.TITLE, new LatexToUnicodeFormatter()).cleanup(entry);
83+
84+
// Check if the current citation key is bad (too long, contains URL, or illegal chars)
85+
String key = entry.getCitationKey().orElse("");
86+
if (isBadKey(key)) {
87+
// If so, generate a new citation key and set as citation key
88+
entry.setCitationKey(generateNewKey(entry));
89+
}
90+
}
91+
92+
String generateNewKey(BibEntry entry){
93+
// Generate a new citation key following INSPIRE texkey rules
94+
String newKey = "";
95+
Optional<String> authors = entry.getField(StandardField.AUTHOR);
96+
Optional<String> year = entry.getField(StandardField.YEAR);
97+
98+
// Parse authors into structured list; if absent, returns empty list
99+
List<Author> authorList = AuthorList.parse(authors.orElse("")).getAuthors();
100+
if (year.isPresent()){
101+
// If author info is available, use [first author's last name]:[year][other initials]
102+
if (authors.isPresent() && !authorList.isEmpty()){
103+
String firstLastName = authorList.getFirst().getNamePrefixAndFamilyName();
104+
StringBuilder suffix = new StringBuilder();
105+
106+
// Append the first letter of each author's last name
107+
for (Author author : authorList) {
108+
String lastName = author.getNamePrefixAndFamilyName();
109+
if (!lastName.isEmpty()) {
110+
suffix.append(lastName.charAt(0));
111+
}
112+
}
113+
114+
// Remove the first author's initial
115+
if (!suffix.isEmpty()) {
116+
suffix.deleteCharAt(0);
117+
}
118+
newKey = firstLastName + ":" + year.get() + suffix;
119+
}
120+
// If no author, but collaboration field exists, use [collaboration]:[year]
121+
else if (entry.getField(new UnknownField("collaboration")).isPresent()) {
122+
newKey = entry.getField(new UnknownField("collaboration")).get() + ":" + year.get();
123+
}
124+
// If no author/collaboration, but arXiv eprint exists, use arXiv:[eprint]
125+
else if (entry.getField(StandardField.EPRINT).isPresent()) {
126+
newKey = "arXiv:" + entry.getField(StandardField.EPRINT).get();
127+
}
128+
else {
129+
// TODO: warning for missing important information
130+
}
131+
}
132+
else {
133+
// If no year, fallback to arXiv if available
134+
if (entry.getField(StandardField.EPRINT).isPresent()) {
135+
newKey = "arXiv:" + entry.getField(StandardField.EPRINT).get();
136+
}
137+
else {
138+
// TODO: warning for missing important information
139+
}
140+
}
141+
return newKey;
142+
}
143+
144+
/**
145+
* Checks if the citation key is bad: contains illegal characters, is too long, or is a URL.
146+
*/
147+
boolean isBadKey(String key){
148+
char[] invalidChars = {'/', '\\', '*', '?', '"', '<', '>', '|', '#', '%'};
149+
for (char c : invalidChars) {
150+
if (key.contains(String.valueOf(c))) {
151+
return true;
152+
}
153+
}
154+
// Consider key bad if too long or is a URL
155+
return key.length() > 30 || key.startsWith("http://") || key.startsWith("https://");
156+
}
157+
158+
/**
159+
* If the BibEntry contains a 'texkeys' field, use it as the citation key and clear the field.
160+
*/
161+
void setTexkeys(BibEntry entry){
162+
Optional<String> texkeys = entry.getField(new UnknownField("texkeys"));
163+
if (texkeys.isPresent() && !texkeys.get().isBlank()) {
164+
entry.setCitationKey(texkeys.get());
165+
entry.clearField(new UnknownField("texkeys"));
166+
}
81167
}
82168

83169
@Override
@@ -110,6 +196,7 @@ public List<BibEntry> performSearch(@NonNull BibEntry entry) throws FetcherExcep
110196
try {
111197
URLDownload download = getUrlDownload(url);
112198
List<BibEntry> results = getParser().parseEntries(download.asInputStream());
199+
results.forEach(this::setTexkeys);
113200
results.forEach(this::doPostCleanup);
114201
return results;
115202
} catch (ParseException e) {

0 commit comments

Comments
 (0)