file in wrong directory

33b40bc9 · Franziska Roepke · c8140241 · c8140241
Commit 33b40bc9 authored 1 year ago by Franziska Roepke
--- a/API_access_local_backup.ipynb
+++ b/API_access_local_backup.ipynb
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Erste Spielereien \n",
-    "\n",
-    "### Geht das mit der API für uns ?\n",
-    "probiere mal das Python package dass es gibt... \n",
-    "https://pypi.org/project/hca/\n",
-    "\n",
-    "oke, fail: das wird nich mehr verwendet. schade"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import requests\n",
-    "import os\n",
-    "from tqdm import tqdm\n",
-    "import json\n",
-    "import pandas as pd\n",
-    "from tabulate import tabulate"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# example function, downloading a single file which was specified before \n",
-    "# modified! \n",
-    "# TODO make this function work. Add file name to output_pat (see example code)\n",
-    "\n",
-    "def download_file(index, output_path, files_list):\n",
-    "    for i in index: \n",
-    "\n",
-    "        url_tp = files_list[i]['url']\n",
-    "        url = url_tp.replace('/fetch', '')  # Work around https://github.com/DataBiosphere/azul/issues/2908\n",
-    "    \n",
-    "        response = requests.get(url, stream=True)\n",
-    "        response.raise_for_status()\n",
-    "    \n",
-    "        total = int(response.headers.get('content-length', 0))\n",
-    "        print(f'Downloading to: {output_path}', flush=True)\n",
-    "    \n",
-    "        with open(output_path, 'wb') as f:\n",
-    "            with tqdm(total=total, unit='B', unit_scale=True, unit_divisor=1024) as bar:\n",
-    "                for chunk in response.iter_content(chunk_size=1024):\n",
-    "                    size = f.write(chunk)\n",
-    "                    bar.update(size)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "project_uuid = '4a95101c-9ffc-4f30-a809-f04518a23803'\n",
-    "catalog = 'dcp26'\n",
-    "endpoint_url = f'https://service.azul.data.humancellatlas.org/index/projects/{project_uuid}'\n",
-    "\n",
-    "save_location = '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloads Complete.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# example downoading of a project\n",
-    "\n",
-    "response = requests.get(endpoint_url, params={'catalog': catalog,})\n",
-    "response.raise_for_status()\n",
-    "response_json = response.json()\n",
-    "project = response_json['projects'][0]\n",
-    "\n",
-    "file_urls = set()\n",
-    "for key in ('matrices', 'contributedAnalyses'):\n",
-    "    tree = project[key]\n",
-    "    for path, file_info in iterate_matrices_tree(tree):\n",
-    "        url = file_info['url']\n",
-    "        if url not in file_urls:\n",
-    "            dest_path = os.path.join(save_location, file_info['name'])\n",
-    "            # TODO uncomment the folllowing line if you really want to download data: \n",
-    "            # download_file(url, dest_path)\n",
-    "            file_urls.add(url)\n",
-    "print('Downloads Complete.')\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# function gets meta data of loom files\n",
-    "# TODO make default params and abiltiy to passing params to the function\n",
-    "def get_files_metadata():\n",
-    "    params = {\n",
-    "        'catalog': 'dcp26',\n",
-    "        'filters': '{\"fileFormat\": {\"is\": [\"loom\",\"loom.gz\"]}}',\n",
-    "        'size': 10,\n",
-    "        'sort': 'lastModifiedDate',\n",
-    "        'order': 'asc'\n",
-    "    }\n",
-    "    url = f'https://service.azul.data.humancellatlas.org/index/files'\n",
-    "    response = requests.get(url, params=params)\n",
-    "\n",
-    "    # Check the response status code\n",
-    "    if response.status_code == 200:\n",
-    "        # Request was successful\n",
-    "        response_json = response.json()\n",
-    "        return response_json\n",
-    "    else:\n",
-    "        # An error occurred\n",
-    "        print(\"Error:\", response.status_code)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "def get_file_list(response_json):\n",
-    "    # extract file-information into a list of dicts\n",
-    "    file_data = []  #list\n",
-    "    for hit in response_json['hits']:\n",
-    "        for file in hit['files']:\n",
-    "            file_dict = {\n",
-    "                'fileName':file['name'],\n",
-    "                'size':file['size'],\n",
-    "                'version':file['version'],\n",
-    "                'projectShortname':hit['projects'][0]['projectShortname'][0],\n",
-    "                'projectId':hit['projects'][0]['projectId'][0],\n",
-    "                'entryId':hit['entryId'],\n",
-    "                'contentDescription':file['contentDescription'][0],\n",
-    "                'url':file['url']\n",
-    "            }\n",
-    "            file_data.append(file_dict)\n",
-    "    file_data_with_index = [{\n",
-    "        'index': i,\n",
-    "        **file_dict\n",
-    "    } for i, file_dict in enumerate(file_data)]\n",
-    "    return file_data_with_index\n",
-    "\n",
-    "def print_file_table(response_json):\n",
-    "\n",
-    "    #file_data = get_file_list(response_json)\n",
-    "    file_data = response_json\n",
-    "    \n",
-    "    # create nested list out of file_data\n",
-    "    headers = [\"Index\", \"File Name\" , \"Size\", \"Version\", \"Project (shortname)\", \"ProjectID\", \n",
-    "               \"EntryId\", \"Content Description\"]\n",
-    "    table_data = [[\n",
-    "        file['index'],\n",
-    "        file['fileName'],\n",
-    "        file['size'],\n",
-    "        file['version'],\n",
-    "        file['projectShortname'],\n",
-    "        file['projectId'],\n",
-    "        file['entryId'],\n",
-    "        file['contentDescription'],\n",
-    "    ] for file in file_data]\n",
-    "\n",
-    "    table = tabulate(table_data, headers, tablefmt='fancy_grid')\n",
-    "    print(table)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "╒═════════╤═══════════════════════════════════════════╤════════════╤═════════════════════════════╤════════════════════════════╤══════════════════════════════════════╤══════════════════════════════════════╤═══════════════════════╕\n",
-      "│   Index │ File Name                                 │       Size │ Version                     │ Project (shortname)        │ ProjectID                            │ EntryId                              │ Content Description   │\n",
-      "╞═════════╪═══════════════════════════════════════════╪════════════╪═════════════════════════════╪════════════════════════════╪══════════════════════════════════════╪══════════════════════════════════════╪═══════════════════════╡\n",
-      "│       0 │ 098cc66a-d806-42db-a1c8-fa99a0317d7c.loom │  854692581 │ 2021-02-03T19:43:20.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ 131ea511-25f7-5801-993f-bfa25f8ca68d │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       1 │ 294fe5d9-c1e8-4670-80d3-4c2b0a5e33c1.loom │ 1530326527 │ 2021-02-03T19:51:58.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ 7848d80b-6b1d-56b5-b19a-9639e3c4efbe │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       2 │ d6536459-ab4e-4954-a0ce-5e6d07670039.loom │  938504115 │ 2021-02-03T19:44:43.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ b98cfaac-64f5-59f5-b42e-209186812c19 │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       3 │ a040dae6-e0b1-49cf-a9ee-9793d5ad7d9c.loom │ 1478984890 │ 2021-02-03T19:49:15.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ c7b6470c-e2f0-5141-a8a2-11eb0984689a │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       4 │ t-cell-activation-human-lung-10XV2.loom   │  395054566 │ 2021-02-10T18:04:33.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ d0b95f2c-98ae-582b-84f4-e2bd0c5a0adb │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       5 │ 0f14c412-5014-4ac0-9a71-858b2f047777.loom │  423142737 │ 2021-02-04T15:18:49.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 2b9f7c89-d1c2-53ef-a769-80fec2f7d9e6 │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       6 │ 37cad11b-c8c9-4d1f-b715-498b0f8d4b35.loom │ 1066947865 │ 2021-02-04T15:49:34.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 32c69d68-1792-53af-9f42-0e97c9afc94b │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       7 │ dc31f31d-ab56-4025-9834-99be638a2d50.loom │  745509487 │ 2021-02-04T15:34:25.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 38a0ef48-9df1-5fef-8eb6-b32fbb67aabd │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       8 │ afd0ea55-e710-4b46-bb05-2423e491b6f5.loom │  698042665 │ 2021-02-04T15:34:36.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 3cd78fb7-b7bc-5ab1-b122-47585f0023d4 │ Count Matrix          │\n",
-      "├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤\n",
-      "│       9 │ b3ce1085-08dc-42ff-a609-6968315327a8.loom │  425012253 │ 2021-02-04T15:30:01.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 44175006-91f3-5d95-9a08-b33e08ed1ae3 │ Count Matrix          │\n",
-      "╘═════════╧═══════════════════════════════════════════╧════════════╧═════════════════════════════╧════════════════════════════╧══════════════════════════════════════╧══════════════════════════════════════╧═══════════════════════╛\n",
-      "Downloading to: /home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data\n"
-     ]
-    },
-    {
-     "ename": "IsADirectoryError",
-     "evalue": "[Errno 21] Is a directory: '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIsADirectoryError\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[75], line 12\u001b[0m\n\u001b[1;32m      9\u001b[0m catalog \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mdcp26\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m     10\u001b[0m save_location \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m---> 12\u001b[0m download_file(index, save_location,files_list)\n",
-      "Cell \u001b[0;32mIn[68], line 16\u001b[0m, in \u001b[0;36mdownload_file\u001b[0;34m(index, output_path, files_list)\u001b[0m\n\u001b[1;32m     13\u001b[0m total \u001b[39m=\u001b[39m \u001b[39mint\u001b[39m(response\u001b[39m.\u001b[39mheaders\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mcontent-length\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m0\u001b[39m))\n\u001b[1;32m     14\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mDownloading to: \u001b[39m\u001b[39m{\u001b[39;00moutput_path\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m, flush\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m---> 16\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(output_path, \u001b[39m'\u001b[39;49m\u001b[39mwb\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m     17\u001b[0m     \u001b[39mwith\u001b[39;00m tqdm(total\u001b[39m=\u001b[39mtotal, unit\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mB\u001b[39m\u001b[39m'\u001b[39m, unit_scale\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, unit_divisor\u001b[39m=\u001b[39m\u001b[39m1024\u001b[39m) \u001b[39mas\u001b[39;00m bar:\n\u001b[1;32m     18\u001b[0m         \u001b[39mfor\u001b[39;00m chunk \u001b[39min\u001b[39;00m response\u001b[39m.\u001b[39miter_content(chunk_size\u001b[39m=\u001b[39m\u001b[39m1024\u001b[39m):\n",
-      "File \u001b[0;32m~/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py:284\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m    277\u001b[0m \u001b[39mif\u001b[39;00m file \u001b[39min\u001b[39;00m {\u001b[39m0\u001b[39m, \u001b[39m1\u001b[39m, \u001b[39m2\u001b[39m}:\n\u001b[1;32m    278\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m    279\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mIPython won\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt let you open fd=\u001b[39m\u001b[39m{\u001b[39;00mfile\u001b[39m}\u001b[39;00m\u001b[39m by default \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    280\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    281\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39myou can use builtins\u001b[39m\u001b[39m'\u001b[39m\u001b[39m open.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    282\u001b[0m     )\n\u001b[0;32m--> 284\u001b[0m \u001b[39mreturn\u001b[39;00m io_open(file, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
-      "\u001b[0;31mIsADirectoryError\u001b[0m: [Errno 21] Is a directory: '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'"
-     ]
-    }
-   ],
-   "source": [
-    "# example workflow\n",
-    "# get file metadata \n",
-    "antwort = get_files_metadata()\n",
-    "# transform & print it as list\n",
-    "files_list = get_file_list(antwort)\n",
-    "print_file_table(files_list)\n",
-    "# specifiy which file(s) you want to download (provide index-list)\n",
-    "index = [6]\n",
-    "catalog = 'dcp26'\n",
-    "save_location = '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'\n",
-    "\n",
-    "download_file(index, save_location,files_list)\n"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Weiterführende Notizen\n",
-    "\n",
-    "https://www.askpython.com/python/examples/pull-data-from-an-api"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Loom Dateien: \n",
-    "\n",
-    "Umgang mit LoomPy: \n",
-    "\n",
-    "https://linnarssonlab.org/loompy/apiwalkthrough/index.html"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
-%% Cell type:markdown id: tags:
-## Erste Spielereien
-### Geht das mit der API für uns ?
-probiere mal das Python package dass es gibt...
-https://pypi.org/project/hca/
-oke, fail: das wird nich mehr verwendet. schade
-%% Cell type:code id: tags:
-``` python
-import requests
-import os
-from tqdm import tqdm
-import json
-import pandas as pd
-from tabulate import tabulate
-```
-%% Cell type:code id: tags:
-``` python
-# example function, downloading a single file which was specified before
-# modified!
-# TODO make this function work. Add file name to output_pat (see example code)
-def download_file(index, output_path, files_list):
-    for i in index:
-        url_tp = files_list[i]['url']
-        url = url_tp.replace('/fetch', '')  # Work around https://github.com/DataBiosphere/azul/issues/2908
-        response = requests.get(url, stream=True)
-        response.raise_for_status()
-        total = int(response.headers.get('content-length', 0))
-        print(f'Downloading to: {output_path}', flush=True)
-        with open(output_path, 'wb') as f:
-            with tqdm(total=total, unit='B', unit_scale=True, unit_divisor=1024) as bar:
-                for chunk in response.iter_content(chunk_size=1024):
-                    size = f.write(chunk)
-                    bar.update(size)
-```
-%% Cell type:code id: tags:
-``` python
-project_uuid = '4a95101c-9ffc-4f30-a809-f04518a23803'
-catalog = 'dcp26'
-endpoint_url = f'https://service.azul.data.humancellatlas.org/index/projects/{project_uuid}'
-save_location = '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'
-```
-%% Cell type:code id: tags:
-``` python
-# example downoading of a project
-response = requests.get(endpoint_url, params={'catalog': catalog,})
-response.raise_for_status()
-response_json = response.json()
-project = response_json['projects'][0]
-file_urls = set()
-for key in ('matrices', 'contributedAnalyses'):
-    tree = project[key]
-    for path, file_info in iterate_matrices_tree(tree):
-        url = file_info['url']
-        if url not in file_urls:
-            dest_path = os.path.join(save_location, file_info['name'])
-            # TODO uncomment the folllowing line if you really want to download data:
-            # download_file(url, dest_path)
-            file_urls.add(url)
-print('Downloads Complete.')
-```
-%% Output
-    Downloads Complete.
-%% Cell type:code id: tags:
-``` python
-# function gets meta data of loom files
-# TODO make default params and abiltiy to passing params to the function
-def get_files_metadata():
-    params = {
-        'catalog': 'dcp26',
-        'filters': '{"fileFormat": {"is": ["loom","loom.gz"]}}',
-        'size': 10,
-        'sort': 'lastModifiedDate',
-        'order': 'asc'
-    }
-    url = f'https://service.azul.data.humancellatlas.org/index/files'
-    response = requests.get(url, params=params)
-    # Check the response status code
-    if response.status_code == 200:
-        # Request was successful
-        response_json = response.json()
-        return response_json
-    else:
-        # An error occurred
-        print("Error:", response.status_code)
-```
-%% Cell type:code id: tags:
-``` python
-def get_file_list(response_json):
-    # extract file-information into a list of dicts
-    file_data = []  #list
-    for hit in response_json['hits']:
-        for file in hit['files']:
-            file_dict = {
-                'fileName':file['name'],
-                'size':file['size'],
-                'version':file['version'],
-                'projectShortname':hit['projects'][0]['projectShortname'][0],
-                'projectId':hit['projects'][0]['projectId'][0],
-                'entryId':hit['entryId'],
-                'contentDescription':file['contentDescription'][0],
-                'url':file['url']
-            }
-            file_data.append(file_dict)
-    file_data_with_index = [{
-        'index': i,
-        **file_dict
-    } for i, file_dict in enumerate(file_data)]
-    return file_data_with_index
-def print_file_table(response_json):
-    #file_data = get_file_list(response_json)
-    file_data = response_json
-    # create nested list out of file_data
-    headers = ["Index", "File Name" , "Size", "Version", "Project (shortname)", "ProjectID",
-               "EntryId", "Content Description"]
-    table_data = [[
-        file['index'],
-        file['fileName'],
-        file['size'],
-        file['version'],
-        file['projectShortname'],
-        file['projectId'],
-        file['entryId'],
-        file['contentDescription'],
-    ] for file in file_data]
-    table = tabulate(table_data, headers, tablefmt='fancy_grid')
-    print(table)
-```
-%% Cell type:code id: tags:
-``` python
-# example workflow
-# get file metadata
-antwort = get_files_metadata()
-# transform & print it as list
-files_list = get_file_list(antwort)
-print_file_table(files_list)
-# specifiy which file(s) you want to download (provide index-list)
-index = [6]
-catalog = 'dcp26'
-save_location = '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'
-download_file(index, save_location,files_list)
-```
-%% Output
-    ╒═════════╤═══════════════════════════════════════════╤════════════╤═════════════════════════════╤════════════════════════════╤══════════════════════════════════════╤══════════════════════════════════════╤═══════════════════════╕
-    │   Index │ File Name                                 │       Size │ Version                     │ Project (shortname)        │ ProjectID                            │ EntryId                              │ Content Description   │
-    ╞═════════╪═══════════════════════════════════════════╪════════════╪═════════════════════════════╪════════════════════════════╪══════════════════════════════════════╪══════════════════════════════════════╪═══════════════════════╡
-    │       0 │ 098cc66a-d806-42db-a1c8-fa99a0317d7c.loom │  854692581 │ 2021-02-03T19:43:20.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ 131ea511-25f7-5801-993f-bfa25f8ca68d │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       1 │ 294fe5d9-c1e8-4670-80d3-4c2b0a5e33c1.loom │ 1530326527 │ 2021-02-03T19:51:58.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ 7848d80b-6b1d-56b5-b19a-9639e3c4efbe │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       2 │ d6536459-ab4e-4954-a0ce-5e6d07670039.loom │  938504115 │ 2021-02-03T19:44:43.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ b98cfaac-64f5-59f5-b42e-209186812c19 │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       3 │ a040dae6-e0b1-49cf-a9ee-9793d5ad7d9c.loom │ 1478984890 │ 2021-02-03T19:49:15.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ c7b6470c-e2f0-5141-a8a2-11eb0984689a │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       4 │ t-cell-activation-human-lung-10XV2.loom   │  395054566 │ 2021-02-10T18:04:33.000000Z │ HumanTissueTcellActivation │ 4a95101c-9ffc-4f30-a809-f04518a23803 │ d0b95f2c-98ae-582b-84f4-e2bd0c5a0adb │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       5 │ 0f14c412-5014-4ac0-9a71-858b2f047777.loom │  423142737 │ 2021-02-04T15:18:49.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 2b9f7c89-d1c2-53ef-a769-80fec2f7d9e6 │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       6 │ 37cad11b-c8c9-4d1f-b715-498b0f8d4b35.loom │ 1066947865 │ 2021-02-04T15:49:34.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 32c69d68-1792-53af-9f42-0e97c9afc94b │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       7 │ dc31f31d-ab56-4025-9834-99be638a2d50.loom │  745509487 │ 2021-02-04T15:34:25.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 38a0ef48-9df1-5fef-8eb6-b32fbb67aabd │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       8 │ afd0ea55-e710-4b46-bb05-2423e491b6f5.loom │  698042665 │ 2021-02-04T15:34:36.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 3cd78fb7-b7bc-5ab1-b122-47585f0023d4 │ Count Matrix          │
-    ├─────────┼───────────────────────────────────────────┼────────────┼─────────────────────────────┼────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┤
-    │       9 │ b3ce1085-08dc-42ff-a609-6968315327a8.loom │  425012253 │ 2021-02-04T15:30:01.000000Z │ KidneySingleCellAtlas      │ abe1a013-af7a-45ed-8c26-f3793c24a1f4 │ 44175006-91f3-5d95-9a08-b33e08ed1ae3 │ Count Matrix          │
-    ╘═════════╧═══════════════════════════════════════════╧════════════╧═════════════════════════════╧════════════════════════════╧══════════════════════════════════════╧══════════════════════════════════════╧═══════════════════════╛
-    Downloading to: /home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data
-    ---------------------------------------------------------------------------
-    IsADirectoryError                         Traceback (most recent call last)
-Cell     In[75], line 12
-          9 catalog = 'dcp26'
-         10 save_location = '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'
-    ---> 12 download_file(index, save_location,files_list)
-Cell     In[68], line 16, in download_file(index, output_path, files_list)
-         13 total = int(response.headers.get('content-length', 0))
-         14 print(f'Downloading to: {output_path}', flush=True)
-    ---> 16 with open(output_path, 'wb') as f:
-         17     with tqdm(total=total, unit='B', unit_scale=True, unit_divisor=1024) as bar:
-         18         for chunk in response.iter_content(chunk_size=1024):
-File     ~/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py:284, in _modified_open(file, *args, **kwargs)
-        277 if file in {0, 1, 2}:
-        278     raise ValueError(
-        279         f"IPython won't let you open fd={file} by default "
-        280         "as it is likely to crash IPython. If you know what you are doing, "
-        281         "you can use builtins' open."
-        282     )
-    --> 284 return io_open(file, *args, **kwargs)
-    IsADirectoryError: [Errno 21] Is a directory: '/home/fran/Documents/AAMasterDataScience/BigDataPraktikum/data'
-%% Cell type:markdown id: tags:
-## Weiterführende Notizen
-https://www.askpython.com/python/examples/pull-data-from-an-api
-%% Cell type:markdown id: tags:
-Loom Dateien:
-Umgang mit LoomPy:
-https://linnarssonlab.org/loompy/apiwalkthrough/index.html