Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
BigData Praktikum
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Franziska Roepke
BigData Praktikum
Merge requests
!1
Couldn't fetch the linked file.
Dev
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Dev
dev
into
main
Overview
0
Commits
8
Pipelines
0
Changes
1
Merged
Paul Kuehnel
requested to merge
dev
into
main
1 year ago
Overview
0
Commits
8
Pipelines
0
Changes
1
Expand
0
0
Merge request reports
Viewing commit
d1ee87c9
Prev
Next
Show latest version
1 file
+
240
−
0
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
d1ee87c9
first attempt to download loom files via api
· d1ee87c9
franzi - schranzi
authored
1 year ago
API_access_local_backup.ipynb
0 → 100644
+
240
−
0
Options
%% Cell type:markdown id: tags:
## Erste Spielereien
### Geht das mit der API für uns ?
probiere mal das Python package dass es gibt...
https://pypi.org/project/hca/
oke, fail: das wird nich mehr verwendet. schade
%% Cell type:code id: tags:
```
python
import
requests
import
os
from
tqdm
import
tqdm
import
json
import
pandas
as
pd
from
tabulate
import
tabulate
```
%% Cell type:code id: tags:
```
python
# example function, downloading a single file which was specified before
# modified!
# TODO make this function work. Add file name to output_pat (see example code)
def
download_file
(
index
,
output_path
,
files_list
):
for
i
in
index
:
url_tp
=
files_list
[
i
][
'
url
'
]
url
=
url_tp
.
replace
(
'
/fetch
'
,
''
)
# Work around https://github.com/DataBiosphere/azul/issues/2908
response
=
requests
.
get
(
url
,
stream
=
True
)
response
.
raise_for_status
()
total
=
int
(
response
.
headers
.
get
(
'
content-length
'
,
0
))
print
(
f
'
Downloading to:
{
output_path
}
'
,
flush
=
True
)
with
open
(
output_path
,
'
wb
'
)
as
f
:
with
tqdm
(
total
=
total
,
unit
=
'
B
'
,
unit_scale
=
True
,
unit_divisor
=
1024
)
as
bar
:
for
chunk
in
response
.
iter_content
(
chunk_size
=
1024
):
size
=
f
.
write
(
chunk
)
bar
.
update
(
size
)
```
%% Cell type:code id: tags:
```
python
project_uuid
=
'
4a95101c-9ffc-4f30-a809-f04518a23803
'
catalog
=
'
dcp26
'
endpoint_url
=
f
'
https://service.azul.data.humancellatlas.org/index/projects/
{
project_uuid
}
'
save_location
=
'
/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data
'
```
%% Cell type:code id: tags:
```
python
# example downoading of a project
response
=
requests
.
get
(
endpoint_url
,
params
=
{
'
catalog
'
:
catalog
,})
response
.
raise_for_status
()
response_json
=
response
.
json
()
project
=
response_json
[
'
projects
'
][
0
]
file_urls
=
set
()
for
key
in
(
'
matrices
'
,
'
contributedAnalyses
'
):
tree
=
project
[
key
]
for
path
,
file_info
in
iterate_matrices_tree
(
tree
):
url
=
file_info
[
'
url
'
]
if
url
not
in
file_urls
:
dest_path
=
os
.
path
.
join
(
save_location
,
file_info
[
'
name
'
])
# TODO uncomment the folllowing line if you really want to download data:
# download_file(url, dest_path)
file_urls
.
add
(
url
)
print
(
'
Downloads Complete.
'
)
```
%% Output
Downloads Complete.
%% Cell type:code id: tags:
```
python
# function gets meta data of loom files
# TODO make default params and abiltiy to passing params to the function
def
get_files_metadata
():
params
=
{
'
catalog
'
:
'
dcp26
'
,
'
filters
'
:
'
{
"
fileFormat
"
: {
"
is
"
: [
"
loom
"
,
"
loom.gz
"
]}}
'
,
'
size
'
:
10
,
'
sort
'
:
'
lastModifiedDate
'
,
'
order
'
:
'
asc
'
}
url
=
f
'
https://service.azul.data.humancellatlas.org/index/files
'
response
=
requests
.
get
(
url
,
params
=
params
)
# Check the response status code
if
response
.
status_code
==
200
:
# Request was successful
response_json
=
response
.
json
()
return
response_json
else
:
# An error occurred
print
(
"
Error:
"
,
response
.
status_code
)
```
%% Cell type:code id: tags:
```
python
def
get_file_list
(
response_json
):
# extract file-information into a list of dicts
file_data
=
[]
#list
for
hit
in
response_json
[
'
hits
'
]:
for
file
in
hit
[
'
files
'
]:
file_dict
=
{
'
fileName
'
:
file
[
'
name
'
],
'
size
'
:
file
[
'
size
'
],
'
version
'
:
file
[
'
version
'
],
'
projectShortname
'
:
hit
[
'
projects
'
][
0
][
'
projectShortname
'
][
0
],
'
projectId
'
:
hit
[
'
projects
'
][
0
][
'
projectId
'
][
0
],
'
entryId
'
:
hit
[
'
entryId
'
],
'
contentDescription
'
:
file
[
'
contentDescription
'
][
0
],
'
url
'
:
file
[
'
url
'
]
}
file_data
.
append
(
file_dict
)
file_data_with_index
=
[{
'
index
'
:
i
,
**
file_dict
}
for
i
,
file_dict
in
enumerate
(
file_data
)]
return
file_data_with_index
def
print_file_table
(
response_json
):
#file_data = get_file_list(response_json)
file_data
=
response_json
# create nested list out of file_data
headers
=
[
"
Index
"
,
"
File Name
"
,
"
Size
"
,
"
Version
"
,
"
Project (shortname)
"
,
"
ProjectID
"
,
"
EntryId
"
,
"
Content Description
"
]
table_data
=
[[
file
[
'
index
'
],
file
[
'
fileName
'
],
file
[
'
size
'
],
file
[
'
version
'
],
file
[
'
projectShortname
'
],
file
[
'
projectId
'
],
file
[
'
entryId
'
],
file
[
'
contentDescription
'
],
]
for
file
in
file_data
]
table
=
tabulate
(
table_data
,
headers
,
tablefmt
=
'
fancy_grid
'
)
print
(
table
)
```
%% Cell type:code id: tags:
```
python
# example workflow
# get file metadata
antwort
=
get_files_metadata
()
# transform & print it as list
files_list
=
get_file_list
(
antwort
)
print_file_table
(
files_list
)
# specifiy which file(s) you want to download (provide index-list)
index
=
[
6
]
catalog
=
'
dcp26
'
save_location
=
'
/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data
'
download_file
(
index
,
save_location
,
files_list
)
```
%% Output
╒═════════╤═══════════════════════════════════════════╤════════════╤═════════════════════════════╤════════════════════════════╤══════════════════════════════════════╤══════════════════════════════════════╤═══════════════════════╕
│ Index │ File Name │ Size │ Version │ Project (shortname) │ ProjectID │ EntryId │ Content Description │
╞═════════╪═══════════════════════════════════════════╪════════════╪═════════════════════════════╪════════════════════════════╪══════════════════════════════════════╪══════════════════════════════════════╪═══════════════════════╡
│ 0 │ 098cc66a-d806-42db-a1c8-fa99a0317d7c.loom │ 854692581 │ 2021-02-03T19:43:20.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ 131ea511-25f7-5801-993f-bfa25f8ca68d │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 1 │ 294fe5d9-c1e8-4670-80d3-4c2b0a5e33c1.loom │ 1530326527 │ 2021-02-03T19:51:58.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ 7848d80b-6b1d-56b5-b19a-9639e3c4efbe │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 2 │ d6536459-ab4e-4954-a0ce-5e6d07670039.loom │ 938504115 │ 2021-02-03T19:44:43.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ b98cfaac-64f5-59f5-b42e-209186812c19 │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 3 │ a040dae6-e0b1-49cf-a9ee-9793d5ad7d9c.loom │ 1478984890 │ 2021-02-03T19:49:15.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ c7b6470c-e2f0-5141-a8a2-11eb0984689a │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 4 │ t-cell-activation-human-lung-10XV2.loom │ 395054566 │ 2021-02-10T18:04:33.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ d0b95f2c-98ae-582b-84f4-e2bd0c5a0adb │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 5 │ 0f14c412-5014-4ac0-9a71-858b2f047777.loom │ 423142737 │ 2021-02-04T15:18:49.000000Z │ KidneySingleCellAtlas │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 2b9f7c89-d1c2-53ef-a769-80fec2f7d9e6 │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 6 │ 37cad11b-c8c9-4d1f-b715-498b0f8d4b35.loom │ 1066947865 │ 2021-02-04T15:49:34.000000Z │ KidneySingleCellAtlas │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 32c69d68-1792-53af-9f42-0e97c9afc94b │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 7 │ dc31f31d-ab56-4025-9834-99be638a2d50.loom │ 745509487 │ 2021-02-04T15:34:25.000000Z │ KidneySingleCellAtlas │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 38a0ef48-9df1-5fef-8eb6-b32fbb67aabd │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 8 │ afd0ea55-e710-4b46-bb05-2423e491b6f5.loom │ 698042665 │ 2021-02-04T15:34:36.000000Z │ KidneySingleCellAtlas │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 3cd78fb7-b7bc-5ab1-b122-47585f0023d4 │ Count Matrix │
├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
│ 9 │ b3ce1085-08dc-42ff-a609-6968315327a8.loom │ 425012253 │ 2021-02-04T15:30:01.000000Z │ KidneySingleCellAtlas │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 44175006-91f3-5d95-9a08-b33e08ed1ae3 │ Count Matrix │
╘═════════╧═══════════════════════════════════════════╧════════════╧═════════════════════════════╧════════════════════════════╧══════════════════════════════════════╧══════════════════════════════════════╧═══════════════════════╛
Downloading to: /home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data
---------------------------------------------------------------------------
IsADirectoryError Traceback (most recent call last)
Cell In[75], line 12
9 catalog = 'dcp26'
10 save_location = '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'
---> 12 download_file(index, save_location,files_list)
Cell In[68], line 16, in download_file(index, output_path, files_list)
13 total = int(response.headers.get('content-length', 0))
14 print(f'Downloading to: {output_path}', flush=True)
---> 16 with open(output_path, 'wb') as f:
17 with tqdm(total=total, unit='B', unit_scale=True, unit_divisor=1024) as bar:
18 for chunk in response.iter_content(chunk_size=1024):
File ~/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py:284, in _modified_open(file,
*args, *
*
kwargs)
277 if file in {0, 1, 2}:
278 raise ValueError(
279 f"IPython won't let you open fd={file} by default "
280 "as it is likely to crash IPython. If you know what you are doing, "
281 "you can use builtins' open."
282 )
--> 284 return io_open(file,
*args, *
*
kwargs)
IsADirectoryError: [Errno 21] Is a directory: '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'
%% Cell type:markdown id: tags:
## Weiterführende Notizen
https://www.askpython.com/python/examples/pull-data-from-an-api
%% Cell type:markdown id: tags:
Loom Dateien:
Umgang mit LoomPy:
https://linnarssonlab.org/loompy/apiwalkthrough/index.html
Loading