Skip to content

Commit 760d10e

Browse files
authored
Merge pull request #24 from DenisaCG/getContents
Add content retrieval logic
2 parents 11585dc + 50f3a12 commit 760d10e

File tree

7 files changed

+327
-39
lines changed

7 files changed

+327
-39
lines changed

jupyter_drives/handlers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,17 @@ def initialize(self, logger: logging.Logger, manager: JupyterDrivesManager):
6969
return super().initialize(logger, manager)
7070

7171
@tornado.web.authenticated
72-
async def get(self, path: str = "", drive: str = ""):
72+
async def get(self, drive: str = "", path: str = ""):
7373
result = await self._manager.get_contents(drive, path)
7474
self.finish(result)
7575

7676
@tornado.web.authenticated
77-
async def post(self, path: str = "", drive: str = ""):
77+
async def post(self, drive: str = "", path: str = ""):
7878
result = await self._manager.new_file(drive, path)
7979
self.finish(result)
8080

8181
@tornado.web.authenticated
82-
async def patch(self, path: str = "", drive: str = ""):
82+
async def patch(self, drive: str = "", path: str = ""):
8383
body = self.get_json_body()
8484
result = await self._manager.rename_file(drive, path, **body)
8585
self.finish(result)

jupyter_drives/manager.py

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,17 @@
33
import logging
44
from typing import Dict, List, Optional, Tuple, Union, Any
55

6+
import os
67
import tornado
78
import httpx
89
import traitlets
10+
import base64
911
from jupyter_server.utils import url_path_join
1012

1113
import obstore as obs
1214
from libcloud.storage.types import Provider
1315
from libcloud.storage.providers import get_driver
16+
import pyarrow
1417

1518
from .log import get_logger
1619
from .base import DrivesConfig
@@ -86,7 +89,7 @@ async def list_drives(self):
8689
"name": result.name,
8790
"region": self._config.region_name if self._config.region_name is not None else "eu-north-1",
8891
"creation_date": result.extra["creation_date"],
89-
"mounted": "true" if result.name not in self._content_managers else "false",
92+
"mounted": False if result.name not in self._content_managers else True,
9093
"provider": self._config.provider
9194
}
9295
)
@@ -153,14 +156,86 @@ async def unmount_drive(self, drive_name: str):
153156

154157
return
155158

156-
async def get_contents(self, drive_name, path, **kwargs):
159+
async def get_contents(self, drive_name, path):
157160
"""Get contents of a file or directory.
158161
159162
Args:
160163
drive_name: name of drive to get the contents of
161-
path: path to file or directory
164+
path: path to file or directory (empty string for root listing)
162165
"""
163-
print('Get contents function called.')
166+
if path == '/':
167+
path = ''
168+
try :
169+
data = []
170+
isDir = False
171+
emptyDir = True # assume we are dealing with an empty directory
172+
173+
# using Arrow lists as they are recommended for large results
174+
# stream will be an async iterable of RecordBatch
175+
stream = obs.list(self._content_managers[drive_name], path, chunk_size=100, return_arrow=True)
176+
async for batch in stream:
177+
# if content exists we are dealing with a directory
178+
if isDir is False and batch:
179+
isDir = True
180+
emptyDir = False
181+
182+
contents_list = pyarrow.record_batch(batch).to_pylist()
183+
for object in contents_list:
184+
data.append({
185+
"path": object["path"],
186+
"last_modified": object["last_modified"].isoformat(),
187+
"size": object["size"],
188+
})
189+
190+
# check if we are dealing with an empty drive
191+
if isDir is False and path != '':
192+
content = b""
193+
# retrieve contents of object
194+
obj = await obs.get_async(self._content_managers[drive_name], path)
195+
stream = obj.stream(min_chunk_size=5 * 1024 * 1024) # 5MB sized chunks
196+
async for buf in stream:
197+
# if content exists we are dealing with a file
198+
if emptyDir is True and buf:
199+
emptyDir = False
200+
content += buf
201+
202+
# retrieve metadata of object
203+
metadata = await obs.head_async(self._content_managers[drive_name], path)
204+
205+
# for certain media type files, extracted content needs to be read as a byte array and decoded to base64 to be viewable in JupyterLab
206+
# the following extensions correspond to a base64 file format or are of type PDF
207+
ext = os.path.splitext(path)[1]
208+
if ext == '.pdf' or ext == '.svg' or ext == '.tif' or ext == '.tiff' or ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.bmp' or ext == '.webp':
209+
processed_content = base64.b64encode(content).decode("utf-8")
210+
else:
211+
processed_content = content.decode("utf-8")
212+
213+
data = {
214+
"path": path,
215+
"content": processed_content,
216+
"last_modified": metadata["last_modified"].isoformat(),
217+
"size": metadata["size"]
218+
}
219+
220+
# dealing with the case of an empty directory, making sure it is not an empty file
221+
# TO DO: find better way to check
222+
if emptyDir is True:
223+
ext_list = ['.R', '.bmp', '.csv', '.gif', '.html', '.ipynb', '.jl', '.jpeg', '.jpg', '.json', '.jsonl', '.md', '.ndjson', '.pdf', '.png', '.py', '.svg', '.tif', '.tiff', '.tsv', '.txt', '.webp', '.yaml', '.yml']
224+
object_name = os.path.basename(path)
225+
# if object doesn't contain . or doesn't end in one of the registered extensions
226+
if object_name.find('.') == -1 or ext_list.count(os.path.splitext(object_name)[1]) == 0:
227+
data = []
228+
229+
response = {
230+
"data": data
231+
}
232+
except Exception as e:
233+
raise tornado.web.HTTPError(
234+
status_code= httpx.codes.BAD_REQUEST,
235+
reason=f"The following error occured when retrieving the contents: {e}",
236+
)
237+
238+
return response
164239

165240
async def new_file(self, drive_name, path, **kwargs):
166241
"""Create a new file or directory at the given path.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ classifiers = [
2323
]
2424
dependencies = [
2525
"obstore>=0.2.0,<0.3",
26+
"pyarrow>=18.0.0,<19.0.0",
2627
"jupyter_server>=2.14.2,<3",
2728
"s3contents>=0.11.1,<0.12.0",
2829
"apache-libcloud>=3.8.0, <4",

src/contents.ts

Lines changed: 79 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
1-
// Copyright (c) Jupyter Development Team.
2-
// Distributed under the terms of the Modified BSD License.
3-
1+
import { JupyterFrontEnd } from '@jupyterlab/application';
42
import { Signal, ISignal } from '@lumino/signaling';
53
import { Contents, ServerConnection } from '@jupyterlab/services';
6-
import { PathExt } from '@jupyterlab/coreutils';
7-
import { IDriveInfo } from './token';
8-
import { mountDrive } from './requests';
4+
import { IDriveInfo, IRegisteredFileTypes } from './token';
5+
import { getContents, mountDrive } from './requests';
96

107
let data: Contents.IModel = {
118
name: '',
@@ -120,6 +117,20 @@ export class Drive implements Contents.IDrive {
120117
return this._serverSettings;
121118
}
122119

120+
/**
121+
* The registered file types
122+
*/
123+
get registeredFileTypes(): IRegisteredFileTypes {
124+
return this._registeredFileTypes;
125+
}
126+
127+
/**
128+
* The registered file types
129+
*/
130+
set registeredFileTypes(fileTypes: IRegisteredFileTypes) {
131+
this._registeredFileTypes = fileTypes;
132+
}
133+
123134
/**
124135
* A signal emitted when a file operation takes place.
125136
*/
@@ -185,40 +196,41 @@ export class Drive implements Contents.IDrive {
185196
): Promise<Contents.IModel> {
186197
let relativePath = '';
187198
if (localPath !== '') {
188-
if (localPath.includes(this.name)) {
189-
relativePath = localPath.split(this.name + '/')[1];
190-
} else {
191-
relativePath = localPath;
192-
}
193-
194199
// extract current drive name
195-
const currentDrive = this.drivesList.filter(x => x.name === localPath)[0];
200+
const currentDrive = this._drivesList.filter(
201+
x =>
202+
x.name ===
203+
(localPath.indexOf('/') !== -1
204+
? localPath.substring(0, localPath.indexOf('/'))
205+
: localPath)
206+
)[0];
207+
196208
// when accessed the first time, mount drive
197-
if (!currentDrive.mounted) {
209+
if (currentDrive.mounted === false) {
198210
try {
199211
await mountDrive(localPath, {
200212
provider: currentDrive.provider,
201213
region: currentDrive.region
202214
});
203-
currentDrive.mounted = true;
215+
this._drivesList.filter(x => x.name === localPath)[0].mounted = true;
204216
} catch (e) {
205217
console.log(e);
206218
}
207219
}
208220

209-
data = {
210-
name: PathExt.basename(localPath),
211-
path: PathExt.basename(localPath),
212-
last_modified: '',
213-
created: '',
214-
content: [],
215-
format: 'json',
216-
mimetype: '',
217-
size: undefined,
218-
writable: true,
219-
type: 'directory'
220-
};
221+
// eliminate drive name from path
222+
relativePath =
223+
localPath.indexOf('/') !== -1
224+
? localPath.substring(localPath.indexOf('/') + 1)
225+
: '';
226+
227+
data = await getContents(currentDrive.name, {
228+
path: relativePath,
229+
registeredFileTypes: this._registeredFileTypes
230+
});
221231
} else {
232+
// retriving list of contents from root
233+
// in our case: list available drives
222234
const drivesList: Contents.IModel[] = [];
223235
for (const drive of this._drivesList) {
224236
drivesList.push({
@@ -248,7 +260,6 @@ export class Drive implements Contents.IDrive {
248260
type: 'directory'
249261
};
250262
}
251-
console.log('GET: ', relativePath);
252263

253264
Contents.validateContentsModel(data);
254265
return data;
@@ -558,7 +569,11 @@ export class Drive implements Contents.IDrive {
558569
* checkpoint is created.
559570
*/
560571
createCheckpoint(path: string): Promise<Contents.ICheckpointModel> {
561-
return Promise.reject('Repository is read only');
572+
const emptyCheckpoint: Contents.ICheckpointModel = {
573+
id: '',
574+
last_modified: ''
575+
};
576+
return Promise.resolve(emptyCheckpoint);
562577
}
563578

564579
/**
@@ -599,6 +614,40 @@ export class Drive implements Contents.IDrive {
599614
return Promise.reject('Read only');
600615
}
601616

617+
/**
618+
* Get all registered file types and store them accordingly with their file
619+
* extension (e.g.: .txt, .pdf, .jpeg), file mimetype (e.g.: text/plain, application/pdf)
620+
* and file format (e.g.: base64, text).
621+
*
622+
* @param app
623+
*/
624+
getRegisteredFileTypes(app: JupyterFrontEnd) {
625+
// get called when instating the toolbar
626+
const registeredFileTypes = app.docRegistry.fileTypes();
627+
628+
for (const fileType of registeredFileTypes) {
629+
// check if we are dealing with a directory
630+
if (fileType.extensions.length === 0) {
631+
this._registeredFileTypes[''] = {
632+
fileType: 'directory',
633+
fileFormat: 'json',
634+
fileMimeTypes: ['text/directory']
635+
};
636+
}
637+
638+
// store the mimetype and fileformat for each file extension
639+
fileType.extensions.forEach(extension => {
640+
if (!this._registeredFileTypes[extension]) {
641+
this._registeredFileTypes[extension] = {
642+
fileType: fileType.name,
643+
fileMimeTypes: [...fileType.mimeTypes],
644+
fileFormat: fileType.fileFormat ?? ''
645+
};
646+
}
647+
});
648+
}
649+
}
650+
602651
/**
603652
* Get a REST url for a file given a path.
604653
*/
@@ -619,6 +668,7 @@ export class Drive implements Contents.IDrive {
619668
private _fileChanged = new Signal<this, Contents.IChangedArgs>(this);
620669
private _isDisposed: boolean = false;
621670
private _disposed = new Signal<this, void>(this);
671+
private _registeredFileTypes: IRegisteredFileTypes = {};
622672
}
623673

624674
export namespace Drive {

src/index.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ const drivesListProvider: JupyterFrontEndPlugin<IDriveInfo[]> = {
172172
mounted: drive.mounted
173173
});
174174
}
175-
} catch {
176-
console.log('Failed loading available drives list.');
175+
} catch (error) {
176+
console.log('Failed loading available drives list, with error: ', error);
177177
}
178178
return drives;
179179
}
@@ -224,6 +224,9 @@ const driveFileBrowser: JupyterFrontEndPlugin<void> = {
224224

225225
app.serviceManager.contents.addDrive(drive);
226226

227+
// get registered file types
228+
drive.getRegisteredFileTypes(app);
229+
227230
// Manually restore and load the drive file browser.
228231
const driveBrowser = fileBrowserFactory.createFileBrowser('drivebrowser', {
229232
auto: false,

0 commit comments

Comments
 (0)