Fixed issues

JMante1 · JMante1 · commit fd970911a1bd · 2020-09-10T20:49:06.000-06:00
Fixed column hard coding issues and switched to using temporary files when things were written to the harddisk
diff --git a/app.py b/app.py
@@ -11,7 +11,7 @@
 from most_used_by_type_bar import most_used_by_type_bar
 from toggle_bars import toggle_bars
 
-import os
+import tempfile, os, shutil
 
 app = Flask(__name__)
 
@@ -59,9 +59,6 @@ def Sankey_Run():
         #instance_ur = 'https://synbiohub.org/'
         #url = 'https://synbiohub.org/public/igem/BBa_B0012/1'
         #top_level_url = 'https://dev.synbiohub.org/public/igem/BBa_B0012/1'
-        
-        #get current working directory
-        cwd = os.getcwd()
 
         #retrieve information about the poi
         self_df, display_id, title, role, count = input_data(top_level_url, instance_url)
@@ -74,18 +71,22 @@ def Sankey_Run():
         df_sankey = sankey(url, top_level_url, title, instance_url)
 
         sankey_title = "Parts Co-Located with "+ title + " (a "+role_link+")"
-        filename= os.path.join(cwd, f'sankey_{display_id}_.html')
+        
+        #create a temporary directory
+        temp_dir = tempfile.TemporaryDirectory()
+        
+        #name file
+        filename = os.path.join(temp_dir.name, "Sankey.html")
 
+        
         #create the sankey diagram
         sankey_graph(filename, df_sankey, 'Node, Label',
                     'Link', 'Color', 'Source','Target', 'Value',
                     'Link Color', sankey_title, url_not_name=False) 
+        
 
         #obtain the html from the sankey diagram
         result = retrieve_html(filename)
-
-        #delete the copy of the sankey diagram on the server
-        os.remove(filename)
        
         return result 
     except Exception as e:
@@ -133,9 +134,6 @@ def Bar_Run():
     url = complete_sbol.replace('/sbol','')
 
     try:
-        
-        #current working directory
-        cwd = os.getcwd()
 
         #create input data
         self_df, display_id, title, role, count = input_data(top_level_url, instance_url)
@@ -147,39 +145,36 @@ def Bar_Run():
         #graph title for most used barchart
         graph_title = f'Top Ten Parts by Number of Uses Compared to <a href="{url}" target="_blank">{title}</a>'
 
-        #where to save the file
-        filename1= os.path.join(cwd, f'bar1_{display_id}.html')
+        #create a temporary directory
+        temp_dir = tempfile.TemporaryDirectory()
+        
+        #name file
+        filename1 = os.path.join(temp_dir.name, "Most_Used.html")
 
         #create the most used barchart
         bar_plot('title','count','color',bar_df, graph_title, filename1, 'deff')
 
         #retrieve html
         most_used = retrieve_html(filename1)
 
-        #remove file
-        os.remove(filename1)
-
         #find poi role ontology link
         role_link = find_role_name(role, plural = False)
 
         bar_df = most_used_by_type_bar(top_level_url,instance_url, display_id, title, 
                       role, count)
-
+        
         #graph title for most used barchart
         graph_title = f'Top Ten {role_link} by Number of Uses Compared to <a href="{url}" target="_blank">{title}</a>'
 
-        #where to save the file
-        filename2= os.path.join(cwd, f'bar2_{display_id}.html')
-
+        #name file
+        filename2 = os.path.join(temp_dir.name, "Most_Used_Type.html")
+       
         #create the most used barchart
         bar_plot('title','count','color',bar_df, graph_title, filename2, 'deff')
 
         #retrieve html
         by_role = retrieve_html(filename2)
 
-        #remove file
-        os.remove(filename2)
-
         #create bar toggle html
         toggle_display = toggle_bars(most_used,by_role)
 
diff --git a/input_data.py b/input_data.py
@@ -1,6 +1,6 @@
 import json
 import requests
-from pandas.io.json import json_normalize
+from pandas import json_normalize
 
 def input_data(uri, instance):
     """
@@ -71,11 +71,10 @@ def input_data(uri, instance):
         #format responses
         d = json.loads(r.text)
         a = json_normalize(d['results']['bindings'])
-
-        #renames columns from ['count.datatype', 'count.type', 'count.value', 'def.type', 'def.value',
-        #   'displayId.type', 'displayId.value', 'role.type', 'role.value',
-        #   'title.type', 'title.value']
-        a.columns = ['cd', 'ct','count', 'dt', 'deff', 'dist', 'displayId','rt', 'roletog', 'tt', 'title']
+        
+        #renames columns
+        rename_dict = {'count.datatype':'cd', 'count.type':'ct', 'count.value':'count', 'def.type':'dt', 'def.value':'deff', 'displayId.type':'dist', 'displayId.value':'displayId', 'role.type':'rt', 'role.value':'roletog', 'title.type':'tt', 'title.value':'title'}
+        a.columns = [rename_dict[col] for col in a.columns]
         
         #split column roletog at SO: to leave the http://identifiers.org/so in the column http
         #and the roler number (e.g. 0000141) in the column role
diff --git a/most_used_bar.py b/most_used_bar.py
@@ -1,7 +1,7 @@
 import pandas as pd
 import requests
 import json
-from pandas.io.json import json_normalize
+from pandas import json_normalize
 from uri_to_url import uri_to_url
 
 def most_used_bar(uri, instance, display_id, title, role, count):
@@ -14,7 +14,7 @@ def most_used_bar(uri, instance, display_id, title, role, count):
     import pandas as pd
     import requests
     import json
-    from pandas.io.json import json_normalize
+    from pandas import json_normalize
     from uri_to_url import uri_to_url
     Most_Used_Query.txt
     
@@ -84,11 +84,10 @@ def most_used_bar(uri, instance, display_id, title, role, count):
     d = json.loads(r.text)
     bar_df = json_normalize(d['results']['bindings'])
     
-    #rename columns from ['count.datatype', 'count.type', 'count.value', 
-    #        'def.type', 'def.value', 'displayId.type', 'displayId.value', 
-    #        'role.type', 'role.value', 'title.type', 'title.value']
-    bar_df.columns = ['cd', 'ct','count', 'dt', 'deff', 'dist', 'displayId',
-                      'rt', 'roletog', 'tt', 'title']
+    
+    #rename columns
+    rename_dict = {'count.datatype':'cd', 'count.type':'ct', 'count.value':'count', 'def.type':'dt', 'def.value':'deff', 'displayId.type':'dist', 'displayId.value':'displayId', 'role.type':'rt', 'role.value':'roletog', 'title.type':'tt', 'title.value':'title'}
+    bar_df.columns = [rename_dict[col] for col in bar_df.columns]
     
     #drop unneeded columns
     bar_df = bar_df.drop(['cd', 'ct', 'dt', 'dist', 'rt', 'tt'], axis=1)
@@ -109,9 +108,10 @@ def most_used_bar(uri, instance, display_id, title, role, count):
     bar_df['deff'] = uri_to_url(bar_df['deff'], instance, spoofed_instance)
 
     #change the final row in the dataframe (usually row 11)
-    #to contain the information about the poi
-    bar_df.iloc[robustness] = [count,part_url,display_id,
-               "http://identifiers.org/so/SO:"+str(role),title]
+    #poi row is added like this so the ordering of the columns doesn't have to match
+    poi_row = pd.DataFrame.from_dict({'displayId':[display_id], 'title':[title], 'count':[count],
+                 'roletog':[f"http://identifiers.org/so/SO:{str(role)}"], 'deff':[part_url]})
+    bar_df.iloc[robustness] = poi_row.iloc[0]
     
     #define what colour each role should get (other is ignored)
     colormap = {
diff --git a/most_used_by_type_bar.py b/most_used_by_type_bar.py
@@ -1,7 +1,7 @@
 import pandas as pd
 import requests
 import json
-from pandas.io.json import json_normalize
+from pandas import json_normalize
 from uri_to_url import uri_to_url
 
 def most_used_by_type_bar(uri, instance, display_id, title, role, count):
@@ -15,7 +15,7 @@ def most_used_by_type_bar(uri, instance, display_id, title, role, count):
     import pandas as pd
     import requests
     import json
-    from pandas.io.json import json_normalize
+    from pandas import json_normalize
     Most_Used_By_Type_Query.txt
     
     Parameters
@@ -87,12 +87,11 @@ def most_used_by_type_bar(uri, instance, display_id, title, role, count):
     d = json.loads(r.text)
     bar_df = json_normalize(d['results']['bindings'])
     
-    #rename the columns from ['count.datatype', 'count.type', 'count.value',
-    #        'def.type', 'def.value', 'displayId.type', 'displayId.value', 
-    #        'role.type', 'role.value', 'title.type', 'title.value']
-    bar_df.columns = ['cd', 'ct','count', 'dt', 'deff', 'dist', 'displayId',
-                       'rt', 'roletog', 'tt', 'title']
     
+    #rename columns
+    rename_dict = {'count.datatype':'cd', 'count.type':'ct', 'count.value':'count', 'def.type':'dt', 'def.value':'deff', 'displayId.type':'dist', 'displayId.value':'displayId', 'role.type':'rt', 'role.value':'roletog', 'title.type':'tt', 'title.value':'title'}
+    bar_df.columns = [rename_dict[col] for col in bar_df.columns]
+     
     #drop unneeded columns
     bar_df = bar_df.drop(['cd', 'ct', 'dt', 'dist', 'rt', 'tt'], axis=1)
     
@@ -110,11 +109,13 @@ def most_used_by_type_bar(uri, instance, display_id, title, role, count):
     
     #replace uris with urls
     bar_df['deff'] = uri_to_url(bar_df['deff'], instance, spoofed_instance)
-        
+    
     #change the final row in the dataframe (usually row 11)
-    #to contain the information about the poi
-    bar_df.iloc[robustness] = [count,part_url,display_id,
-               "http://identifiers.org/so/SO:"+str(role),title]
+    #poi row is added like this so the ordering of the columns doesn't have to match
+    poi_row = pd.DataFrame.from_dict({'displayId':[display_id], 'title':[title], 'count':[count],
+                 'roletog':[f"http://identifiers.org/so/SO:{str(role)}"], 'deff':[part_url]})
+    bar_df.iloc[robustness] = poi_row.iloc[0]
+
     
     #define what colour each role should get (other is ignored)
     colormap = {
diff --git a/sankey.py b/sankey.py
@@ -1,7 +1,7 @@
 import requests
 import json
 import pandas as pd
-from pandas.io.json import json_normalize
+from pandas import json_normalize
 from uri_to_url import uri_to_url
 
 def sankey(url, uri, title, instance):
@@ -64,13 +64,10 @@ def sankey(url, uri, title, instance):
     d = json.loads(r.text)
     order_df = json_normalize(d['results']['bindings'])
     
-    #rename columns from ['average_preceeding.datatype', 'average_preceeding.type',
-    #       'average_preceeding.value', 'count.datatype', 'count.type',
-    #       'count.value', 'def2.type', 'def2.value', 'displayId.type',
-    #       'displayId.value', 'role.type', 'role.value', 'title.type',
-    #      'title.value']
-    order_df.columns = ['ad', 'at','centfol', 'cd', 'ct', 'count', 'dt','deff', 'dt1', 'displayId','rt', 'roletog', 'tt','title']
-    
+    #rename columns
+    rename_dict = {'average_preceeding.datatype':'ad', 'average_preceeding.type':'at', 'average_preceeding.value':'centfol', 'count.datatype':'cd', 'count.type':'ct', 'count.value':'count', 'def2.type':'dt', 'def2.value':'deff', 'displayId.type':'dt1', 'displayId.value':'displayId', 'role.type':'rt', 'role.value':'roletog', 'title.type':'tt', 'title.value':'title'}
+    order_df.columns = [rename_dict[col] for col in order_df.columns]
+
     #drop unneeded columns
     order_df = order_df.drop(['ad', 'at', 'cd', 'ct', 'dt', 'dt1', 'rt', 'tt'], axis=1)
     
@@ -85,6 +82,7 @@ def sankey(url, uri, title, instance):
     #'roletog' - role of the part (e.g. http://identifiers.org/so/SO:0000141)
     #'title' - human name of the part
     """
+    
     #change number columns from strings to number type
     order_df['count'] = order_df['count'].apply(pd.to_numeric)
     order_df['centfol'] = order_df['centfol'].apply(pd.to_numeric)
diff --git a/uri_to_url.py b/uri_to_url.py
@@ -43,7 +43,7 @@ def uri_to_url(data, instance, spoofed_instance):
     2    https://dev.synbiohub.org/public/igem/BBa_C0040/1
     """
     #checks if any changes need to be made
-    if spoofed_insance != instance:
+    if spoofed_instance != instance:
         
         #finds the data type of the input data
         data_type = type(data)