Skip to content

Commit c9d86c7

Browse files
committed
remove unnecessary items
1 parent 1066589 commit c9d86c7

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

python/graphy/utils/data_extractor.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ def write_csv(file_path: str, data: list, headers: list = None):
4141
for row in data:
4242
headers.update(row.keys())
4343
if headers:
44+
if "abstract" in headers:
45+
headers.remove("abstract")
46+
if "primary_class" in headers:
47+
headers.remove("primary_class")
4448
if "id" in headers:
4549
headers.remove("id") # Remove "id" if it's in the headers
4650
headers = ["id"] + sorted(
@@ -49,21 +53,29 @@ def write_csv(file_path: str, data: list, headers: list = None):
4953
else:
5054
headers = sorted(headers) # Just sort if "id" is not present
5155
header_set = set(headers)
52-
for row in data:
53-
missing_fields = header_set - row.keys() # Find missing fields in the row
54-
if missing_fields:
55-
# Add missing fields with empty values in one go
56-
row.update({field: "" for field in missing_fields})
57-
56+
else:
57+
header_set = None
5858
if len(data) > 0:
5959
with open(file_path, "w", newline="") as file:
6060
writer = csv.DictWriter(
6161
file, fieldnames=headers, delimiter=DEFAULT_DELIMITER
6262
)
6363
writer.writeheader()
6464
for row in data:
65+
if header_set:
66+
missing_fields = (
67+
header_set - row.keys()
68+
) # Find missing fields in the row
69+
redundant_fields = row.keys() - header_set
70+
if missing_fields:
71+
# Add missing fields with empty values in one go
72+
row.update({field: "" for field in missing_fields})
73+
for field in redundant_fields:
74+
del row[field] # Remove field from the row
75+
if "authors" in row and "author" in header_set:
76+
if len(row["authors"]) > 0:
77+
row["author"] = row["authors"][0]
6578
try:
66-
# Exclude the 'embedding' field from being written to the CSV
6779
writer.writerow(row)
6880
except Exception as e:
6981
print(f"Error: {e}")

0 commit comments

Comments
 (0)