|
22 | 22 | import org.jabref.logic.importer.util.MediaTypes; |
23 | 23 | import org.jabref.logic.layout.format.LatexToUnicodeFormatter; |
24 | 24 | import org.jabref.logic.net.URLDownload; |
| 25 | +import org.jabref.model.entry.Author; |
| 26 | +import org.jabref.model.entry.AuthorList; |
25 | 27 | import org.jabref.model.entry.BibEntry; |
26 | 28 | import org.jabref.model.entry.field.StandardField; |
27 | 29 | import org.jabref.model.entry.field.UnknownField; |
@@ -78,6 +80,90 @@ public void doPostCleanup(BibEntry entry) { |
78 | 80 | new FieldFormatterCleanup(StandardField.TITLE, new RemoveEnclosingBracesFormatter()).cleanup(entry); |
79 | 81 |
|
80 | 82 | new FieldFormatterCleanup(StandardField.TITLE, new LatexToUnicodeFormatter()).cleanup(entry); |
| 83 | + |
| 84 | + // Check if the current citation key is bad (too long, contains URL, or illegal chars) |
| 85 | + String key = entry.getCitationKey().orElse(""); |
| 86 | + if (isBadKey(key)) { |
| 87 | + // If so, generate a new citation key and set as citation key |
| 88 | + entry.setCitationKey(generateNewKey(entry)); |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + String generateNewKey(BibEntry entry){ |
| 93 | + // Generate a new citation key following INSPIRE texkey rules |
| 94 | + String newKey = ""; |
| 95 | + Optional<String> authors = entry.getField(StandardField.AUTHOR); |
| 96 | + Optional<String> year = entry.getField(StandardField.YEAR); |
| 97 | + |
| 98 | + // Parse authors into structured list; if absent, returns empty list |
| 99 | + List<Author> authorList = AuthorList.parse(authors.orElse("")).getAuthors(); |
| 100 | + if (year.isPresent()){ |
| 101 | + // If author info is available, use [first author's last name]:[year][other initials] |
| 102 | + if (authors.isPresent() && !authorList.isEmpty()){ |
| 103 | + String firstLastName = authorList.getFirst().getNamePrefixAndFamilyName(); |
| 104 | + StringBuilder suffix = new StringBuilder(); |
| 105 | + |
| 106 | + // Append the first letter of each author's last name |
| 107 | + for (Author author : authorList) { |
| 108 | + String lastName = author.getNamePrefixAndFamilyName(); |
| 109 | + if (!lastName.isEmpty()) { |
| 110 | + suffix.append(lastName.charAt(0)); |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + // Remove the first author's initial |
| 115 | + if (!suffix.isEmpty()) { |
| 116 | + suffix.deleteCharAt(0); |
| 117 | + } |
| 118 | + newKey = firstLastName + ":" + year.get() + suffix; |
| 119 | + } |
| 120 | + // If no author, but collaboration field exists, use [collaboration]:[year] |
| 121 | + else if (entry.getField(new UnknownField("collaboration")).isPresent()) { |
| 122 | + newKey = entry.getField(new UnknownField("collaboration")).get() + ":" + year.get(); |
| 123 | + } |
| 124 | + // If no author/collaboration, but arXiv eprint exists, use arXiv:[eprint] |
| 125 | + else if (entry.getField(StandardField.EPRINT).isPresent()) { |
| 126 | + newKey = "arXiv:" + entry.getField(StandardField.EPRINT).get(); |
| 127 | + } |
| 128 | + else { |
| 129 | + // TODO: warning for missing important information |
| 130 | + } |
| 131 | + } |
| 132 | + else { |
| 133 | + // If no year, fallback to arXiv if available |
| 134 | + if (entry.getField(StandardField.EPRINT).isPresent()) { |
| 135 | + newKey = "arXiv:" + entry.getField(StandardField.EPRINT).get(); |
| 136 | + } |
| 137 | + else { |
| 138 | + // TODO: warning for missing important information |
| 139 | + } |
| 140 | + } |
| 141 | + return newKey; |
| 142 | + } |
| 143 | + |
| 144 | + /** |
| 145 | + * Checks if the citation key is bad: contains illegal characters, is too long, or is a URL. |
| 146 | + */ |
| 147 | + boolean isBadKey(String key){ |
| 148 | + char[] invalidChars = {'/', '\\', '*', '?', '"', '<', '>', '|', '#', '%'}; |
| 149 | + for (char c : invalidChars) { |
| 150 | + if (key.contains(String.valueOf(c))) { |
| 151 | + return true; |
| 152 | + } |
| 153 | + } |
| 154 | + // Consider key bad if too long or is a URL |
| 155 | + return key.length() > 30 || key.startsWith("http://") || key.startsWith("https://"); |
| 156 | + } |
| 157 | + |
| 158 | + /** |
| 159 | + * If the BibEntry contains a 'texkeys' field, use it as the citation key and clear the field. |
| 160 | + */ |
| 161 | + void setTexkeys(BibEntry entry){ |
| 162 | + Optional<String> texkeys = entry.getField(new UnknownField("texkeys")); |
| 163 | + if (texkeys.isPresent() && !texkeys.get().isBlank()) { |
| 164 | + entry.setCitationKey(texkeys.get()); |
| 165 | + entry.clearField(new UnknownField("texkeys")); |
| 166 | + } |
81 | 167 | } |
82 | 168 |
|
83 | 169 | @Override |
@@ -110,6 +196,7 @@ public List<BibEntry> performSearch(@NonNull BibEntry entry) throws FetcherExcep |
110 | 196 | try { |
111 | 197 | URLDownload download = getUrlDownload(url); |
112 | 198 | List<BibEntry> results = getParser().parseEntries(download.asInputStream()); |
| 199 | + results.forEach(this::setTexkeys); |
113 | 200 | results.forEach(this::doPostCleanup); |
114 | 201 | return results; |
115 | 202 | } catch (ParseException e) { |
|
0 commit comments