|
5 | 5 | data or metadata in Synapse. |
6 | 6 | """ |
7 | 7 |
|
| 8 | +import asyncio |
| 9 | +import os |
8 | 10 | from dataclasses import dataclass, field, replace |
9 | 11 | from typing import Any, AsyncGenerator, Dict, Generator, Optional, Protocol, Union |
10 | 12 |
|
|
28 | 30 | from synapseclient.core.constants.concrete_types import ( |
29 | 31 | CREATE_GRID_REQUEST, |
30 | 32 | FILE_BASED_METADATA_TASK_PROPERTIES, |
| 33 | + GRID_CSV_IMPORT_REQUEST, |
31 | 34 | GRID_RECORD_SET_EXPORT_REQUEST, |
32 | 35 | LIST_GRID_SESSIONS_REQUEST, |
33 | 36 | LIST_GRID_SESSIONS_RESPONSE, |
34 | 37 | RECORD_BASED_METADATA_TASK_PROPERTIES, |
35 | 38 | ) |
| 39 | +from synapseclient.core.upload.multipart_upload_async import multipart_upload_file_async |
36 | 40 | from synapseclient.core.utils import delete_none_keys, merge_dataclass_entities |
37 | 41 | from synapseclient.models.mixins.asynchronous_job import AsynchronousCommunicator |
38 | 42 | from synapseclient.models.recordset import ValidationSummary |
@@ -1078,6 +1082,88 @@ def to_synapse_request(self) -> Dict[str, Any]: |
1078 | 1082 | return request_dict |
1079 | 1083 |
|
1080 | 1084 |
|
| 1085 | +@dataclass |
| 1086 | +class GridCsvImportRequest(AsynchronousCommunicator): |
| 1087 | + """ |
| 1088 | + A request to import a CSV file into an existing grid session. |
| 1089 | +
|
| 1090 | + Represents a [Synapse GridCsvImportRequest](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/grid/GridCsvImportRequest.html). |
| 1091 | +
|
| 1092 | + Attributes: |
| 1093 | + concrete_type: The concrete type for the request |
| 1094 | + session_id: The grid session ID to import the CSV into |
| 1095 | + file_handle_id: The file handle ID of the CSV file to import |
| 1096 | + """ |
| 1097 | + |
| 1098 | + concrete_type: str = GRID_CSV_IMPORT_REQUEST |
| 1099 | + """The concrete type for the request""" |
| 1100 | + |
| 1101 | + session_id: Optional[str] = None |
| 1102 | + """The grid session ID to import the CSV into""" |
| 1103 | + |
| 1104 | + file_handle_id: Optional[str] = None |
| 1105 | + """The file handle ID of the CSV file to import""" |
| 1106 | + |
| 1107 | + _grid_session_data: Optional[Dict[str, Any]] = field(default=None, compare=False) |
| 1108 | + """Internal storage of the full grid session data from the response for later use.""" |
| 1109 | + |
| 1110 | + def fill_from_dict( |
| 1111 | + self, synapse_response: Union[Dict[str, Any], Any] |
| 1112 | + ) -> "GridCsvImportRequest": |
| 1113 | + """ |
| 1114 | + Converts a response from the REST API into this dataclass. |
| 1115 | +
|
| 1116 | + Arguments: |
| 1117 | + synapse_response: The response from the REST API. |
| 1118 | +
|
| 1119 | + Returns: |
| 1120 | + The GridCsvImportRequest object. |
| 1121 | + """ |
| 1122 | + grid_session_data = synapse_response.get("gridSession", {}) |
| 1123 | + self.session_id = grid_session_data.get("sessionId", None) |
| 1124 | + self._grid_session_data = grid_session_data |
| 1125 | + return self |
| 1126 | + |
| 1127 | + def fill_grid_session_from_response(self, grid_session: "Grid") -> "Grid": |
| 1128 | + """ |
| 1129 | + Fills a Grid object with data from the stored response. |
| 1130 | +
|
| 1131 | + Arguments: |
| 1132 | + grid_session: The Grid object to populate. |
| 1133 | +
|
| 1134 | + Returns: |
| 1135 | + The populated Grid object. |
| 1136 | + """ |
| 1137 | + if not self._grid_session_data: |
| 1138 | + return grid_session |
| 1139 | + |
| 1140 | + data = self._grid_session_data |
| 1141 | + grid_session.session_id = data.get("sessionId", None) |
| 1142 | + grid_session.started_by = data.get("startedBy", None) |
| 1143 | + grid_session.started_on = data.get("startedOn", None) |
| 1144 | + grid_session.etag = data.get("etag", None) |
| 1145 | + grid_session.modified_on = data.get("modifiedOn", None) |
| 1146 | + grid_session.last_replica_id_client = data.get("lastReplicaIdClient", None) |
| 1147 | + grid_session.last_replica_id_service = data.get("lastReplicaIdService", None) |
| 1148 | + grid_session.grid_json_schema_id = data.get("gridJsonSchema$Id", None) |
| 1149 | + grid_session.source_entity_id = data.get("sourceEntityId", None) |
| 1150 | + return grid_session |
| 1151 | + |
| 1152 | + def to_synapse_request(self) -> Dict[str, Any]: |
| 1153 | + """ |
| 1154 | + Converts this dataclass to a dictionary suitable for a Synapse REST API request. |
| 1155 | +
|
| 1156 | + Returns: |
| 1157 | + A dictionary representation of this object for API requests. |
| 1158 | + """ |
| 1159 | + request_dict = {"concreteType": self.concrete_type} |
| 1160 | + if self.session_id is not None: |
| 1161 | + request_dict["sessionId"] = self.session_id |
| 1162 | + if self.file_handle_id is not None: |
| 1163 | + request_dict["fileHandleId"] = self.file_handle_id |
| 1164 | + return request_dict |
| 1165 | + |
| 1166 | + |
1081 | 1167 | @dataclass |
1082 | 1168 | class GridSession: |
1083 | 1169 | """ |
@@ -1427,6 +1513,66 @@ def list( |
1427 | 1513 | ``` |
1428 | 1514 | """ |
1429 | 1515 |
|
| 1516 | + def import_csv( |
| 1517 | + self, |
| 1518 | + file_handle_id: Optional[str] = None, |
| 1519 | + local_path: Optional[str] = None, |
| 1520 | + *, |
| 1521 | + timeout: int = 120, |
| 1522 | + synapse_client: Optional[Synapse] = None, |
| 1523 | + ) -> "Grid": |
| 1524 | + """ |
| 1525 | + Import a CSV file into the grid session to populate or update grid data. |
| 1526 | +
|
| 1527 | + Arguments: |
| 1528 | + file_handle_id: The file handle ID of the CSV file to import. Either |
| 1529 | + this or `local_path` must be provided. |
| 1530 | + local_path: Path to a local CSV file to upload and import. Either this |
| 1531 | + or `file_handle_id` must be provided. The file will be uploaded |
| 1532 | + automatically before the import. |
| 1533 | + timeout: The number of seconds to wait for the job to complete or progress |
| 1534 | + before raising a SynapseTimeoutError. Defaults to 120. |
| 1535 | + synapse_client: If not passed in and caching was not disabled by |
| 1536 | + `Synapse.allow_client_caching(False)` this will use the last created |
| 1537 | + instance from the Synapse class constructor. |
| 1538 | +
|
| 1539 | + Returns: |
| 1540 | + Grid: The Grid object with updated session data after the import. |
| 1541 | +
|
| 1542 | + Raises: |
| 1543 | + ValueError: If `session_id` is not set. |
| 1544 | + ValueError: If neither `file_handle_id` nor `local_path` is provided. |
| 1545 | +
|
| 1546 | + Example: Import a CSV via file handle ID |
| 1547 | + |
| 1548 | +
|
| 1549 | + ```python |
| 1550 | + from synapseclient import Synapse |
| 1551 | + from synapseclient.models import Grid |
| 1552 | +
|
| 1553 | + syn = Synapse() |
| 1554 | + syn.login() |
| 1555 | +
|
| 1556 | + grid = Grid(session_id="abc-123-def") |
| 1557 | + grid = grid.import_csv(file_handle_id="12345678") |
| 1558 | + ``` |
| 1559 | +
|
| 1560 | + Example: Import a CSV from a local file |
| 1561 | + |
| 1562 | +
|
| 1563 | + ```python |
| 1564 | + from synapseclient import Synapse |
| 1565 | + from synapseclient.models import Grid |
| 1566 | +
|
| 1567 | + syn = Synapse() |
| 1568 | + syn.login() |
| 1569 | +
|
| 1570 | + grid = Grid(session_id="abc-123-def") |
| 1571 | + grid = grid.import_csv(local_path="/path/to/data.csv") |
| 1572 | + ``` |
| 1573 | + """ |
| 1574 | + return self |
| 1575 | + |
1430 | 1576 |
|
1431 | 1577 | @dataclass |
1432 | 1578 | @async_to_sync |
@@ -1694,6 +1840,90 @@ async def main(): |
1694 | 1840 |
|
1695 | 1841 | return self |
1696 | 1842 |
|
| 1843 | + async def import_csv_async( |
| 1844 | + self, |
| 1845 | + file_handle_id: Optional[str] = None, |
| 1846 | + local_path: Optional[str] = None, |
| 1847 | + *, |
| 1848 | + timeout: int = 120, |
| 1849 | + synapse_client: Optional[Synapse] = None, |
| 1850 | + ) -> "Grid": |
| 1851 | + """ |
| 1852 | + Import a CSV file into the grid session to populate or update grid data. |
| 1853 | +
|
| 1854 | + Arguments: |
| 1855 | + file_handle_id: The file handle ID of the CSV file to import. Either |
| 1856 | + this or `local_path` must be provided. |
| 1857 | + local_path: Path to a local CSV file to upload and import. Either this |
| 1858 | + or `file_handle_id` must be provided. The file will be uploaded |
| 1859 | + automatically before the import. |
| 1860 | + timeout: The number of seconds to wait for the job to complete or progress |
| 1861 | + before raising a SynapseTimeoutError. Defaults to 120. |
| 1862 | + synapse_client: If not passed in and caching was not disabled by |
| 1863 | + `Synapse.allow_client_caching(False)` this will use the last created |
| 1864 | + instance from the Synapse class constructor. |
| 1865 | +
|
| 1866 | + Returns: |
| 1867 | + Grid: The Grid object with updated session data after the import. |
| 1868 | +
|
| 1869 | + Raises: |
| 1870 | + ValueError: If `session_id` is not set. |
| 1871 | + ValueError: If neither `file_handle_id` nor `local_path` is provided. |
| 1872 | +
|
| 1873 | + Example: Import a CSV from a local file asynchronously |
| 1874 | + |
| 1875 | +
|
| 1876 | + ```python |
| 1877 | + import asyncio |
| 1878 | + from synapseclient import Synapse |
| 1879 | + from synapseclient.models import Grid |
| 1880 | +
|
| 1881 | + syn = Synapse() |
| 1882 | + syn.login() |
| 1883 | +
|
| 1884 | + async def main(): |
| 1885 | + grid = Grid(session_id="abc-123-def") |
| 1886 | + grid = await grid.import_csv_async(local_path="/path/to/data.csv") |
| 1887 | + print(f"Import complete, session etag: {grid.etag}") |
| 1888 | +
|
| 1889 | + asyncio.run(main()) |
| 1890 | + ``` |
| 1891 | + """ |
| 1892 | + if not self.session_id: |
| 1893 | + raise ValueError("session_id is required to import CSV into a GridSession") |
| 1894 | + if not file_handle_id and not local_path: |
| 1895 | + raise ValueError( |
| 1896 | + "Either file_handle_id or local_path must be provided to import CSV" |
| 1897 | + ) |
| 1898 | + |
| 1899 | + trace.get_current_span().set_attributes( |
| 1900 | + { |
| 1901 | + "synapse.session_id": self.session_id or "", |
| 1902 | + } |
| 1903 | + ) |
| 1904 | + |
| 1905 | + if local_path: |
| 1906 | + client = Synapse.get_client(synapse_client=synapse_client) |
| 1907 | + async with client._get_parallel_file_transfer_semaphore( |
| 1908 | + asyncio_event_loop=asyncio.get_running_loop() |
| 1909 | + ): |
| 1910 | + file_handle_id = await multipart_upload_file_async( |
| 1911 | + syn=client, |
| 1912 | + file_path=os.path.expanduser(local_path), |
| 1913 | + ) |
| 1914 | + |
| 1915 | + import_request = GridCsvImportRequest( |
| 1916 | + session_id=self.session_id, |
| 1917 | + file_handle_id=file_handle_id, |
| 1918 | + ) |
| 1919 | + result = await import_request.send_job_and_wait_async( |
| 1920 | + timeout=timeout, synapse_client=synapse_client |
| 1921 | + ) |
| 1922 | + |
| 1923 | + result.fill_grid_session_from_response(self) |
| 1924 | + |
| 1925 | + return self |
| 1926 | + |
1697 | 1927 | def fill_from_dict(self, synapse_response: Dict[str, Any]) -> "Grid": |
1698 | 1928 | """Converts a response from the REST API into this dataclass.""" |
1699 | 1929 | self.session_id = synapse_response.get("sessionId", None) |
|
0 commit comments